bsd/vfs/vfs_syscalls.c

   1 /*
   2  * Copyright (c) 1995-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1989, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  * (c) UNIX System Laboratories, Inc.
  32  * All or some portions of this file are derived from material licensed
  33  * to the University of California by American Telephone and Telegraph
  34  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  35  * the permission of UNIX System Laboratories, Inc.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      @(#)vfs_syscalls.c      8.41 (Berkeley) 6/15/95
  66  */
  67 /*
  68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  69  * support for mandatory and extensible security protections.  This notice
  70  * is included in support of clause 2.2 (b) of the Apple Public License,
  71  * Version 2.0.
  72  */
  73
  74 #include <sys/param.h>
  75 #include <sys/systm.h>
  76 #include <sys/namei.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/kernel.h>
  79 #include <sys/file_internal.h>
  80 #include <sys/stat.h>
  81 #include <sys/vnode_internal.h>
  82 #include <sys/mount_internal.h>
  83 #include <sys/proc_internal.h>
  84 #include <sys/kauth.h>
  85 #include <sys/uio_internal.h>
  86 #include <sys/malloc.h>
  87 #include <sys/mman.h>
  88 #include <sys/dirent.h>
  89 #include <sys/attr.h>
  90 #include <sys/sysctl.h>
  91 #include <sys/ubc.h>
  92 #include <sys/quota.h>
  93 #include <sys/kdebug.h>
  94 #include <sys/fsevents.h>
  95 #include <sys/imgsrc.h>
  96 #include <sys/sysproto.h>
  97 #include <sys/xattr.h>
  98 #include <sys/fcntl.h>
  99 #include <sys/fsctl.h>
 100 #include <sys/ubc_internal.h>
 101 #include <sys/disk.h>
 102 #include <machine/cons.h>
 103 #include <machine/limits.h>
 104 #include <miscfs/specfs/specdev.h>
 105
 106 #include <security/audit/audit.h>
 107 #include <bsm/audit_kevents.h>
 108
 109 #include <mach/mach_types.h>
 110 #include <kern/kern_types.h>
 111 #include <kern/kalloc.h>
 112 #include <kern/task.h>
 113
 114 #include <vm/vm_pageout.h>
 115
 116 #include <libkern/OSAtomic.h>
 117 #include <pexpert/pexpert.h>
 118
 119 #if CONFIG_MACF
 120 #include <security/mac.h>
 121 #include <security/mac_framework.h>
 122 #endif
 123
 124 #if CONFIG_FSE
 125 #define GET_PATH(x) \
 126         (x) = get_pathbuff();
 127 #define RELEASE_PATH(x) \
 128         release_pathbuff(x);
 129 #else
 130 #define GET_PATH(x)     \
 131         MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
 132 #define RELEASE_PATH(x) \
 133         FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
 134 #endif /* CONFIG_FSE */
 135
 136 /* struct for checkdirs iteration */
 137 struct cdirargs {
 138         vnode_t olddp;
 139         vnode_t newdp;
 140 };
 141 /* callback  for checkdirs iteration */
 142 static int checkdirs_callback(proc_t p, void * arg);
 143
 144 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
 145 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
 146 void enablequotas(struct mount *mp, vfs_context_t ctx);
 147 static int getfsstat_callback(mount_t mp, void * arg);
 148 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
 149 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
 150 static int sync_callback(mount_t, void *);
 151 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
 152                         user_addr_t bufp, int *sizep, boolean_t is_64_bit,
 153                                                 boolean_t partial_copy);
 154 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
 155                         user_addr_t bufp);
 156 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
 157 static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
 158                         struct componentname *cnp, user_addr_t fsmountargs,
 159                         int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
 160                         vfs_context_t ctx);
 161 void vfs_notify_mount(vnode_t pdvp);
 162
 163 int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
 164
 165 #ifdef CONFIG_IMGSRC_ACCESS
 166 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
 167 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
 168 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
 169 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
 170 static void mount_end_update(mount_t mp);
 171 static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
 172 #endif /* CONFIG_IMGSRC_ACCESS */
 173
 174 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
 175
 176 __private_extern__
 177 int sync_internal(void);
 178
 179 __private_extern__
 180 int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, int32_t *);
 181
 182 __private_extern__
 183 int unlink1(vfs_context_t, struct nameidata *, int);
 184
 185 /*
 186  * incremented each time a mount or unmount operation occurs
 187  * used to invalidate the cached value of the rootvp in the
 188  * mount structure utilized by cache_lookup_path
 189  */
 190 uint32_t mount_generation = 0;
 191
 192 /* counts number of mount and unmount operations */
 193 unsigned int vfs_nummntops=0;
 194
 195 extern struct fileops vnops;
 196 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
 197
 198
 199 /*
 200  * Virtual File System System Calls
 201  */
 202
 203 #if NFSCLIENT
 204 /*
 205  * Private in-kernel mounting spi (NFS only, not exported)
 206  */
 207  __private_extern__
 208 boolean_t
 209 vfs_iskernelmount(mount_t mp)
 210 {
 211         return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
 212 }
 213
 214  __private_extern__
 215 int
 216 kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
 217              void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
 218 {
 219         struct nameidata nd;
 220         boolean_t did_namei;
 221         int error;
 222
 223         NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
 224                UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
 225
 226         /*
 227          * Get the vnode to be covered if it's not supplied
 228          */
 229         if (vp == NULLVP) {
 230                 error = namei(&nd);
 231                 if (error)
 232                         return (error);
 233                 vp = nd.ni_vp;
 234                 pvp = nd.ni_dvp;
 235                 did_namei = TRUE;
 236         } else {
 237                 char *pnbuf = CAST_DOWN(char *, path);
 238
 239                 nd.ni_cnd.cn_pnbuf = pnbuf;
 240                 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
 241                 did_namei = FALSE;
 242         }
 243
 244         error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
 245                              syscall_flags, kern_flags, NULL, TRUE, ctx);
 246
 247         if (did_namei) {
 248                 vnode_put(vp);
 249                 vnode_put(pvp);
 250                 nameidone(&nd);
 251         }
 252
 253         return (error);
 254 }
 255 #endif /* NFSCLIENT */
 256
 257 /*
 258  * Mount a file system.
 259  */
 260 /* ARGSUSED */
 261 int
 262 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
 263 {
 264         struct __mac_mount_args muap;
 265
 266         muap.type = uap->type;
 267         muap.path = uap->path;
 268         muap.flags = uap->flags;
 269         muap.data = uap->data;
 270         muap.mac_p = USER_ADDR_NULL;
 271         return (__mac_mount(p, &muap, retval));
 272 }
 273
 274 void
 275 vfs_notify_mount(vnode_t pdvp)
 276 {
 277         vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
 278         lock_vnode_and_post(pdvp, NOTE_WRITE);
 279 }
 280
 281 /*
 282  * __mac_mount:
 283  *      Mount a file system taking into account MAC label behavior.
 284  *      See mount(2) man page for more information
 285  *
 286  * Parameters:    p                        Process requesting the mount
 287  *                uap                      User argument descriptor (see below)
 288  *                retval                   (ignored)
 289  *
 290  * Indirect:      uap->type                Filesystem type
 291  *                uap->path                Path to mount
 292  *                uap->data                Mount arguments
 293  *                uap->mac_p               MAC info
 294  *                uap->flags               Mount flags
 295  *
 296  *
 297  * Returns:        0                       Success
 298  *                !0                       Not success
 299  */
 300 boolean_t root_fs_upgrade_try = FALSE;
 301
 302 int
 303 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
 304 {
 305         vnode_t pvp, vp;
 306         vfs_context_t ctx = vfs_context_current();
 307         char fstypename[MFSNAMELEN];
 308         struct nameidata nd;
 309         size_t dummy=0;
 310         char *labelstr = NULL;
 311         int flags = uap->flags;
 312         int error;
 313         boolean_t is_64bit = IS_64BIT_PROCESS(p);
 314
 315         /*
 316          * Get the fs type name from user space
 317          */
 318         error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
 319         if (error)
 320                 return (error);
 321
 322         /*
 323          * Get the vnode to be covered
 324          */
 325         NDINIT(&nd, LOOKUP, OP_MOUNT, NOTRIGGER | FOLLOW | AUDITVNPATH1 | WANTPARENT,
 326                UIO_USERSPACE, uap->path, ctx);
 327         error = namei(&nd);
 328         if (error)
 329                 return (error);
 330         vp = nd.ni_vp;
 331         pvp = nd.ni_dvp;
 332
 333 #ifdef CONFIG_IMGSRC_ACCESS
 334         /* Mounting image source cannot be batched with other operations */
 335         if (flags == MNT_IMGSRC_BY_INDEX) {
 336                 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
 337                                                   ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
 338                 goto out;
 339         }
 340 #endif /* CONFIG_IMGSRC_ACCESS */
 341
 342 #if CONFIG_MACF
 343         /*
 344          * Get the label string (if any) from user space
 345          */
 346         if (uap->mac_p != USER_ADDR_NULL) {
 347                 struct user_mac mac;
 348                 size_t ulen = 0;
 349
 350                 if (is_64bit) {
 351                         struct user64_mac mac64;
 352                         error = copyin(uap->mac_p, &mac64, sizeof(mac64));
 353                         mac.m_buflen = mac64.m_buflen;
 354                         mac.m_string = mac64.m_string;
 355                 } else {
 356                         struct user32_mac mac32;
 357                         error = copyin(uap->mac_p, &mac32, sizeof(mac32));
 358                         mac.m_buflen = mac32.m_buflen;
 359                         mac.m_string = mac32.m_string;
 360                 }
 361                 if (error)
 362                         goto out;
 363                 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
 364                     (mac.m_buflen < 2)) {
 365                         error = EINVAL;
 366                         goto out;
 367                 }
 368                 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
 369                 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
 370                 if (error) {
 371                         goto out;
 372                 }
 373                 AUDIT_ARG(mac_string, labelstr);
 374         }
 375 #endif /* CONFIG_MACF */
 376
 377         AUDIT_ARG(fflags, flags);
 378
 379         if ((vp->v_flag & VROOT) &&
 380                 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
 381                         flags |= MNT_UPDATE;
 382         /*
 383          * See 7392553 for more details on why this check exists.
 384          * Suffice to say: If this check is ON and something tries
 385          * to mount the rootFS RW, we'll turn off the codesign
 386          * bitmap optimization.
 387          */
 388 #if CHECK_CS_VALIDATION_BITMAP
 389                 if ( !(flags & MNT_RDONLY) ) {
 390                         root_fs_upgrade_try = TRUE;
 391                 }
 392 #endif
 393         }
 394
 395         error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
 396                              labelstr, FALSE, ctx);
 397 out:
 398 #if CONFIG_MACF
 399         if (labelstr)
 400                 FREE(labelstr, M_MACTEMP);
 401 #endif /* CONFIG_MACF */
 402
 403         vnode_put(vp);
 404         vnode_put(pvp);
 405         nameidone(&nd);
 406
 407         return (error);
 408 }
 409
 410 /*
 411  * common mount implementation (final stage of mounting)
 412
 413  * Arguments:
 414  *  fstypename  file system type (ie it's vfs name)
 415  *  pvp         parent of covered vnode
 416  *  vp          covered vnode
 417  *  cnp         component name (ie path) of covered vnode
 418  *  flags       generic mount flags
 419  *  fsmountargs file system specific data
 420  *  labelstr    optional MAC label
 421  *  kernelmount TRUE for mounts initiated from inside the kernel
 422  *  ctx         caller's context
 423  */
 424 static int
 425 mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
 426              struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
 427              char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
 428 {
 429         struct vnode *devvp = NULLVP;
 430         struct vnode *device_vnode = NULLVP;
 431 #if CONFIG_MACF
 432         struct vnode *rvp;
 433 #endif
 434         struct mount *mp;
 435         struct vfstable *vfsp = (struct vfstable *)0;
 436         struct proc *p = vfs_context_proc(ctx);
 437         int error, flag = 0;
 438         user_addr_t devpath = USER_ADDR_NULL;
 439         int ronly = 0;
 440         int mntalloc = 0;
 441         boolean_t vfsp_ref = FALSE;
 442         boolean_t is_rwlock_locked = FALSE;
 443         boolean_t did_rele = FALSE;
 444         boolean_t have_usecount = FALSE;
 445
 446         /*
 447          * Process an update for an existing mount
 448          */
 449         if (flags & MNT_UPDATE) {
 450                 if ((vp->v_flag & VROOT) == 0) {
 451                         error = EINVAL;
 452                         goto out1;
 453                 }
 454                 mp = vp->v_mount;
 455
 456                 /* unmount in progress return error */
 457                 mount_lock_spin(mp);
 458                 if (mp->mnt_lflag & MNT_LUNMOUNT) {
 459                         mount_unlock(mp);
 460                         error = EBUSY;
 461                         goto out1;
 462                 }
 463                 mount_unlock(mp);
 464                 lck_rw_lock_exclusive(&mp->mnt_rwlock);
 465                 is_rwlock_locked = TRUE;
 466                 /*
 467                  * We only allow the filesystem to be reloaded if it
 468                  * is currently mounted read-only.
 469                  */
 470                 if ((flags & MNT_RELOAD) &&
 471                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
 472                         error = ENOTSUP;
 473                         goto out1;
 474                 }
 475
 476                 /*
 477                  * If content protection is enabled, update mounts are not
 478                  * allowed to turn it off.
 479                  */
 480                 if ((mp->mnt_flag & MNT_CPROTECT) &&
 481                            ((flags & MNT_CPROTECT) == 0)) {
 482                         error = EINVAL;
 483                         goto out1;
 484                 }
 485
 486 #ifdef CONFIG_IMGSRC_ACCESS
 487                 /* Can't downgrade the backer of the root FS */
 488                 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
 489                         (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
 490                         error = ENOTSUP;
 491                         goto out1;
 492                 }
 493 #endif /* CONFIG_IMGSRC_ACCESS */
 494
 495                 /*
 496                  * Only root, or the user that did the original mount is
 497                  * permitted to update it.
 498                  */
 499                 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
 500                     (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
 501                         goto out1;
 502                 }
 503 #if CONFIG_MACF
 504                 error = mac_mount_check_remount(ctx, mp);
 505                 if (error != 0) {
 506                         goto out1;
 507                 }
 508 #endif
 509                 /*
 510                  * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
 511                  * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
 512                  */
 513                 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
 514                         flags |= MNT_NOSUID | MNT_NODEV;
 515                         if (mp->mnt_flag & MNT_NOEXEC)
 516                                 flags |= MNT_NOEXEC;
 517                 }
 518                 flag = mp->mnt_flag;
 519
 520
 521
 522                 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
 523
 524                 vfsp = mp->mnt_vtable;
 525                 goto update;
 526         }
 527         /*
 528          * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
 529          * MNT_NOEXEC if mount point is already MNT_NOEXEC.
 530          */
 531         if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
 532                 flags |= MNT_NOSUID | MNT_NODEV;
 533                 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
 534                         flags |= MNT_NOEXEC;
 535         }
 536
 537         /* XXXAUDIT: Should we capture the type on the error path as well? */
 538         AUDIT_ARG(text, fstypename);
 539         mount_list_lock();
 540         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 541                 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
 542                         vfsp->vfc_refcount++;
 543                         vfsp_ref = TRUE;
 544                         break;
 545                 }
 546         mount_list_unlock();
 547         if (vfsp == NULL) {
 548                 error = ENODEV;
 549                 goto out1;
 550         }
 551
 552         /*
 553          * VFC_VFSLOCALARGS is not currently supported for kernel mounts
 554          */
 555         if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
 556                 error = EINVAL;  /* unsupported request */
 557                 goto out1;
 558         }
 559
 560         error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
 561         if (error != 0) {
 562                 goto out1;
 563         }
 564
 565         /*
 566          * Allocate and initialize the filesystem (mount_t)
 567          */
 568         MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
 569                 M_MOUNT, M_WAITOK);
 570         bzero((char *)mp, (u_int32_t)sizeof(struct mount));
 571         mntalloc = 1;
 572
 573         /* Initialize the default IO constraints */
 574         mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
 575         mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
 576         mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
 577         mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
 578         mp->mnt_devblocksize = DEV_BSIZE;
 579         mp->mnt_alignmentmask = PAGE_MASK;
 580         mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
 581         mp->mnt_ioscale = 1;
 582         mp->mnt_ioflags = 0;
 583         mp->mnt_realrootvp = NULLVP;
 584         mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
 585
 586         TAILQ_INIT(&mp->mnt_vnodelist);
 587         TAILQ_INIT(&mp->mnt_workerqueue);
 588         TAILQ_INIT(&mp->mnt_newvnodes);
 589         mount_lock_init(mp);
 590         lck_rw_lock_exclusive(&mp->mnt_rwlock);
 591         is_rwlock_locked = TRUE;
 592         mp->mnt_op = vfsp->vfc_vfsops;
 593         mp->mnt_vtable = vfsp;
 594         //mp->mnt_stat.f_type = vfsp->vfc_typenum;
 595         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 596         strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
 597         strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
 598         mp->mnt_vnodecovered = vp;
 599         mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
 600         mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
 601         mp->mnt_devbsdunit = 0;
 602
 603         /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
 604         vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
 605
 606 #if NFSCLIENT
 607         if (kernelmount)
 608                 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
 609         if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
 610                 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
 611 #endif /* NFSCLIENT */
 612
 613 update:
 614         /*
 615          * Set the mount level flags.
 616          */
 617         if (flags & MNT_RDONLY)
 618                 mp->mnt_flag |= MNT_RDONLY;
 619         else if (mp->mnt_flag & MNT_RDONLY) {
 620                 // disallow read/write upgrades of file systems that
 621                 // had the TYPENAME_OVERRIDE feature set.
 622                 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
 623                         error = EPERM;
 624                         goto out1;
 625                 }
 626                 mp->mnt_kern_flag |= MNTK_WANTRDWR;
 627         }
 628         mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 629                           MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 630                           MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
 631                           MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
 632                           MNT_QUARANTINE | MNT_CPROTECT);
 633         mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 634                                  MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 635                                  MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
 636                                  MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
 637                                  MNT_QUARANTINE | MNT_CPROTECT);
 638
 639 #if CONFIG_MACF
 640         if (flags & MNT_MULTILABEL) {
 641                 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
 642                         error = EINVAL;
 643                         goto out1;
 644                 }
 645                 mp->mnt_flag |= MNT_MULTILABEL;
 646         }
 647 #endif
 648         /*
 649          * Process device path for local file systems if requested
 650          */
 651         if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
 652                 if (vfs_context_is64bit(ctx)) {
 653                         if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
 654                                 goto out1;
 655                         fsmountargs += sizeof(devpath);
 656                 } else {
 657                         user32_addr_t tmp;
 658                         if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
 659                                 goto out1;
 660                         /* munge into LP64 addr */
 661                         devpath = CAST_USER_ADDR_T(tmp);
 662                         fsmountargs += sizeof(tmp);
 663                 }
 664
 665                 /* Lookup device and authorize access to it */
 666                 if ((devpath)) {
 667                         struct nameidata nd;
 668
 669                         NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
 670                         if ( (error = namei(&nd)) )
 671                                 goto out1;
 672
 673                         strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
 674                         devvp = nd.ni_vp;
 675
 676                         nameidone(&nd);
 677
 678                         if (devvp->v_type != VBLK) {
 679                                 error = ENOTBLK;
 680                                 goto out2;
 681                         }
 682                         if (major(devvp->v_rdev) >= nblkdev) {
 683                                 error = ENXIO;
 684                                 goto out2;
 685                         }
 686                         /*
 687                         * If mount by non-root, then verify that user has necessary
 688                         * permissions on the device.
 689                         */
 690                         if (suser(vfs_context_ucred(ctx), NULL) != 0) {
 691                                 mode_t accessmode = KAUTH_VNODE_READ_DATA;
 692
 693                                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
 694                                         accessmode |= KAUTH_VNODE_WRITE_DATA;
 695                                 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
 696                                         goto out2;
 697                         }
 698                 }
 699                 /* On first mount, preflight and open device */
 700                 if (devpath && ((flags & MNT_UPDATE) == 0)) {
 701                         if ( (error = vnode_ref(devvp)) )
 702                                 goto out2;
 703                         /*
 704                         * Disallow multiple mounts of the same device.
 705                         * Disallow mounting of a device that is currently in use
 706                         * (except for root, which might share swap device for miniroot).
 707                         * Flush out any old buffers remaining from a previous use.
 708                         */
 709                         if ( (error = vfs_mountedon(devvp)) )
 710                                 goto out3;
 711
 712                         if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
 713                                 error = EBUSY;
 714                                 goto out3;
 715                         }
 716                         if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
 717                                 error = ENOTBLK;
 718                                 goto out3;
 719                         }
 720                         if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
 721                                 goto out3;
 722
 723                         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 724 #if CONFIG_MACF
 725                         error = mac_vnode_check_open(ctx,
 726                             devvp,
 727                             ronly ? FREAD : FREAD|FWRITE);
 728                         if (error)
 729                                 goto out3;
 730 #endif /* MAC */
 731                         if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
 732                                 goto out3;
 733
 734                         mp->mnt_devvp = devvp;
 735                         device_vnode = devvp;
 736
 737                 } else if ((mp->mnt_flag & MNT_RDONLY) &&
 738                            (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
 739                            (device_vnode = mp->mnt_devvp)) {
 740                         dev_t dev;
 741                         int maj;
 742                         /*
 743                          * If upgrade to read-write by non-root, then verify
 744                          * that user has necessary permissions on the device.
 745                          */
 746                         vnode_getalways(device_vnode);
 747
 748                         if (suser(vfs_context_ucred(ctx), NULL) &&
 749                             (error = vnode_authorize(device_vnode, NULL,
 750                              KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
 751                              ctx)) != 0) {
 752                                 vnode_put(device_vnode);
 753                                 goto out2;
 754                         }
 755
 756                         /* Tell the device that we're upgrading */
 757                         dev = (dev_t)device_vnode->v_rdev;
 758                         maj = major(dev);
 759
 760                         if ((u_int)maj >= (u_int)nblkdev)
 761                                 panic("Volume mounted on a device with invalid major number.");
 762
 763                         error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
 764                         vnode_put(device_vnode);
 765                         device_vnode = NULLVP;
 766                         if (error != 0) {
 767                                 goto out2;
 768                         }
 769                 }
 770         }
 771 #if CONFIG_MACF
 772         if ((flags & MNT_UPDATE) == 0) {
 773                 mac_mount_label_init(mp);
 774                 mac_mount_label_associate(ctx, mp);
 775         }
 776         if (labelstr) {
 777                 if ((flags & MNT_UPDATE) != 0) {
 778                         error = mac_mount_check_label_update(ctx, mp);
 779                         if (error != 0)
 780                                 goto out3;
 781                 }
 782         }
 783 #endif
 784         /*
 785          * Mount the filesystem.
 786          */
 787         error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
 788
 789         if (flags & MNT_UPDATE) {
 790                 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
 791                         mp->mnt_flag &= ~MNT_RDONLY;
 792                 mp->mnt_flag &=~
 793                     (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
 794                 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
 795                 if (error)
 796                         mp->mnt_flag = flag;  /* restore flag value */
 797                 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
 798                 lck_rw_done(&mp->mnt_rwlock);
 799                 is_rwlock_locked = FALSE;
 800                 if (!error)
 801                         enablequotas(mp, ctx);
 802                 goto exit;
 803         }
 804
 805         /*
 806          * Put the new filesystem on the mount list after root.
 807          */
 808         if (error == 0) {
 809                 struct vfs_attr vfsattr;
 810 #if CONFIG_MACF
 811                 if (vfs_flags(mp) & MNT_MULTILABEL) {
 812                         error = VFS_ROOT(mp, &rvp, ctx);
 813                         if (error) {
 814                                 printf("%s() VFS_ROOT returned %d\n", __func__, error);
 815                                 goto out3;
 816                         }
 817                         error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
 818                         /*
 819                          * drop reference provided by VFS_ROOT
 820                          */
 821                         vnode_put(rvp);
 822
 823                         if (error)
 824                                 goto out3;
 825                 }
 826 #endif  /* MAC */
 827
 828                 vnode_lock_spin(vp);
 829                 CLR(vp->v_flag, VMOUNT);
 830                 vp->v_mountedhere = mp;
 831                 vnode_unlock(vp);
 832
 833                 /*
 834                  * taking the name_cache_lock exclusively will
 835                  * insure that everyone is out of the fast path who
 836                  * might be trying to use a now stale copy of
 837                  * vp->v_mountedhere->mnt_realrootvp
 838                  * bumping mount_generation causes the cached values
 839                  * to be invalidated
 840                  */
 841                 name_cache_lock();
 842                 mount_generation++;
 843                 name_cache_unlock();
 844
 845                 error = vnode_ref(vp);
 846                 if (error != 0) {
 847                         goto out4;
 848                 }
 849
 850                 have_usecount = TRUE;
 851
 852                 error = checkdirs(vp, ctx);
 853                 if (error != 0)  {
 854                         /* Unmount the filesystem as cdir/rdirs cannot be updated */
 855                         goto out4;
 856                 }
 857                 /*
 858                  * there is no cleanup code here so I have made it void
 859                  * we need to revisit this
 860                  */
 861                 (void)VFS_START(mp, 0, ctx);
 862
 863                 if (mount_list_add(mp) != 0) {
 864                         /*
 865                          * The system is shutting down trying to umount
 866                          * everything, so fail with a plausible errno.
 867                          */
 868                         error = EBUSY;
 869                         goto out4;
 870                 }
 871                 lck_rw_done(&mp->mnt_rwlock);
 872                 is_rwlock_locked = FALSE;
 873
 874                 /* Check if this mounted file system supports EAs or named streams. */
 875                 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
 876                 VFSATTR_INIT(&vfsattr);
 877                 VFSATTR_WANTED(&vfsattr, f_capabilities);
 878                 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
 879                     vfs_getattr(mp, &vfsattr, ctx) == 0 &&
 880                     VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
 881                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
 882                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
 883                                 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 884                         }
 885 #if NAMEDSTREAMS
 886                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
 887                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
 888                                 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
 889                         }
 890 #endif
 891                         /* Check if this file system supports path from id lookups. */
 892                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
 893                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
 894                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 895                         } else if (mp->mnt_flag & MNT_DOVOLFS) {
 896                                 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
 897                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 898                         }
 899                 }
 900                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
 901                         mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 902                 }
 903                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
 904                         mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
 905                 }
 906                 /* increment the operations count */
 907                 OSAddAtomic(1, &vfs_nummntops);
 908                 enablequotas(mp, ctx);
 909
 910                 if (device_vnode) {
 911                         device_vnode->v_specflags |= SI_MOUNTEDON;
 912
 913                         /*
 914                          *   cache the IO attributes for the underlying physical media...
 915                          *   an error return indicates the underlying driver doesn't
 916                          *   support all the queries necessary... however, reasonable
 917                          *   defaults will have been set, so no reason to bail or care
 918                          */
 919                         vfs_init_io_attributes(device_vnode, mp);
 920                 }
 921
 922                 /* Now that mount is setup, notify the listeners */
 923                 vfs_notify_mount(pvp);
 924         } else {
 925                 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
 926                 if (mp->mnt_vnodelist.tqh_first != NULL) {
 927                         panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
 928                                         mp->mnt_vtable->vfc_name, error);
 929                 }
 930
 931                 vnode_lock_spin(vp);
 932                 CLR(vp->v_flag, VMOUNT);
 933                 vnode_unlock(vp);
 934                 mount_list_lock();
 935                 mp->mnt_vtable->vfc_refcount--;
 936                 mount_list_unlock();
 937
 938                 if (device_vnode ) {
 939                         vnode_rele(device_vnode);
 940                         VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
 941                 }
 942                 lck_rw_done(&mp->mnt_rwlock);
 943                 is_rwlock_locked = FALSE;
 944
 945                 /*
 946                  * if we get here, we have a mount structure that needs to be freed,
 947                  * but since the coveredvp hasn't yet been updated to point at it,
 948                  * no need to worry about other threads holding a crossref on this mp
 949                  * so it's ok to just free it
 950                  */
 951                 mount_lock_destroy(mp);
 952 #if CONFIG_MACF
 953                 mac_mount_label_destroy(mp);
 954 #endif
 955                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
 956         }
 957 exit:
 958         /*
 959          * drop I/O count on the device vp if there was one
 960          */
 961         if (devpath && devvp)
 962                 vnode_put(devvp);
 963
 964         return(error);
 965
 966 /* Error condition exits */
 967 out4:
 968         (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
 969
 970         /*
 971          * If the mount has been placed on the covered vp,
 972          * it may have been discovered by now, so we have
 973          * to treat this just like an unmount
 974          */
 975         mount_lock_spin(mp);
 976         mp->mnt_lflag |= MNT_LDEAD;
 977         mount_unlock(mp);
 978
 979         if (device_vnode != NULLVP) {
 980                 vnode_rele(device_vnode);
 981                 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
 982                        ctx);
 983                 did_rele = TRUE;
 984         }
 985
 986         vnode_lock_spin(vp);
 987
 988         mp->mnt_crossref++;
 989         vp->v_mountedhere = (mount_t) 0;
 990
 991         vnode_unlock(vp);
 992
 993         if (have_usecount) {
 994                 vnode_rele(vp);
 995         }
 996 out3:
 997         if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
 998                 vnode_rele(devvp);
 999 out2:
1000         if (devpath && devvp)
1001                 vnode_put(devvp);
1002 out1:
1003         /* Release mnt_rwlock only when it was taken */
1004         if (is_rwlock_locked == TRUE) {
1005                 lck_rw_done(&mp->mnt_rwlock);
1006         }
1007
1008         if (mntalloc) {
1009                 if (mp->mnt_crossref)
1010                         mount_dropcrossref(mp, vp, 0);
1011                 else {
1012                         mount_lock_destroy(mp);
1013 #if CONFIG_MACF
1014                         mac_mount_label_destroy(mp);
1015 #endif
1016                         FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1017                 }
1018         }
1019         if (vfsp_ref) {
1020                 mount_list_lock();
1021                 vfsp->vfc_refcount--;
1022                 mount_list_unlock();
1023         }
1024
1025         return(error);
1026 }
1027
1028 /*
1029  * Flush in-core data, check for competing mount attempts,
1030  * and set VMOUNT
1031  */
1032 int
1033 prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
1034 {
1035         struct vnode_attr va;
1036         int error;
1037
1038         if (!skip_auth) {
1039                 /*
1040                  * If the user is not root, ensure that they own the directory
1041                  * onto which we are attempting to mount.
1042                  */
1043                 VATTR_INIT(&va);
1044                 VATTR_WANTED(&va, va_uid);
1045                 if ((error = vnode_getattr(vp, &va, ctx)) ||
1046                                 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1047                                  (!vfs_context_issuser(ctx)))) {
1048                         error = EPERM;
1049                         goto out;
1050                 }
1051         }
1052
1053         if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1054                 goto out;
1055
1056         if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1057                 goto out;
1058
1059         if (vp->v_type != VDIR) {
1060                 error = ENOTDIR;
1061                 goto out;
1062         }
1063
1064         if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1065                 error = EBUSY;
1066                 goto out;
1067         }
1068
1069 #if CONFIG_MACF
1070         error = mac_mount_check_mount(ctx, vp,
1071             cnp, fsname);
1072         if (error != 0)
1073                 goto out;
1074 #endif
1075
1076         vnode_lock_spin(vp);
1077         SET(vp->v_flag, VMOUNT);
1078         vnode_unlock(vp);
1079
1080 out:
1081         return error;
1082 }
1083
1084 #if CONFIG_IMGSRC_ACCESS
1085
1086 #if DEBUG
1087 #define IMGSRC_DEBUG(args...) printf(args)
1088 #else
1089 #define IMGSRC_DEBUG(args...) do { } while(0)
1090 #endif
1091
1092 static int
1093 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1094 {
1095         struct nameidata nd;
1096         vnode_t vp, realdevvp;
1097         mode_t accessmode;
1098         int error;
1099
1100         NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1101         if ( (error = namei(&nd)) ) {
1102                 IMGSRC_DEBUG("namei() failed with %d\n", error);
1103                 return error;
1104         }
1105
1106         vp = nd.ni_vp;
1107
1108         if (!vnode_isblk(vp)) {
1109                 IMGSRC_DEBUG("Not block device.\n");
1110                 error = ENOTBLK;
1111                 goto out;
1112         }
1113
1114         realdevvp = mp->mnt_devvp;
1115         if (realdevvp == NULLVP) {
1116                 IMGSRC_DEBUG("No device backs the mount.\n");
1117                 error = ENXIO;
1118                 goto out;
1119         }
1120
1121         error = vnode_getwithref(realdevvp);
1122         if (error != 0) {
1123                 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1124                 goto out;
1125         }
1126
1127         if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1128                 IMGSRC_DEBUG("Wrong dev_t.\n");
1129                 error = ENXIO;
1130                 goto out1;
1131         }
1132
1133         strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1134
1135         /*
1136          * If mount by non-root, then verify that user has necessary
1137          * permissions on the device.
1138          */
1139         if (!vfs_context_issuser(ctx)) {
1140                 accessmode = KAUTH_VNODE_READ_DATA;
1141                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1142                         accessmode |= KAUTH_VNODE_WRITE_DATA;
1143                 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1144                         IMGSRC_DEBUG("Access denied.\n");
1145                         goto out1;
1146                 }
1147         }
1148
1149         *devvpp = vp;
1150
1151 out1:
1152         vnode_put(realdevvp);
1153 out:
1154         nameidone(&nd);
1155         if (error) {
1156                 vnode_put(vp);
1157         }
1158
1159         return error;
1160 }
1161
1162 /*
1163  * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1164  * and call checkdirs()
1165  */
1166 static int
1167 place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1168 {
1169         int error;
1170
1171         mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1172
1173         vnode_lock_spin(vp);
1174         CLR(vp->v_flag, VMOUNT);
1175         vp->v_mountedhere = mp;
1176         vnode_unlock(vp);
1177
1178         /*
1179          * taking the name_cache_lock exclusively will
1180          * insure that everyone is out of the fast path who
1181          * might be trying to use a now stale copy of
1182          * vp->v_mountedhere->mnt_realrootvp
1183          * bumping mount_generation causes the cached values
1184          * to be invalidated
1185          */
1186         name_cache_lock();
1187         mount_generation++;
1188         name_cache_unlock();
1189
1190         error = vnode_ref(vp);
1191         if (error != 0) {
1192                 goto out;
1193         }
1194
1195         error = checkdirs(vp, ctx);
1196         if (error != 0)  {
1197                 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1198                 vnode_rele(vp);
1199                 goto out;
1200         }
1201
1202 out:
1203         if (error != 0) {
1204                 mp->mnt_vnodecovered = NULLVP;
1205         }
1206         return error;
1207 }
1208
1209 static void
1210 undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1211 {
1212         vnode_rele(vp);
1213         vnode_lock_spin(vp);
1214         vp->v_mountedhere = (mount_t)NULL;
1215         vnode_unlock(vp);
1216
1217         mp->mnt_vnodecovered = NULLVP;
1218 }
1219
1220 static int
1221 mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1222 {
1223         int error;
1224
1225         /* unmount in progress return error */
1226         mount_lock_spin(mp);
1227         if (mp->mnt_lflag & MNT_LUNMOUNT) {
1228                 mount_unlock(mp);
1229                 return EBUSY;
1230         }
1231         mount_unlock(mp);
1232         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1233
1234         /*
1235          * We only allow the filesystem to be reloaded if it
1236          * is currently mounted read-only.
1237          */
1238         if ((flags & MNT_RELOAD) &&
1239                         ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1240                 error = ENOTSUP;
1241                 goto out;
1242         }
1243
1244         /*
1245          * Only root, or the user that did the original mount is
1246          * permitted to update it.
1247          */
1248         if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1249                         (!vfs_context_issuser(ctx))) {
1250                 error = EPERM;
1251                 goto out;
1252         }
1253 #if CONFIG_MACF
1254         error = mac_mount_check_remount(ctx, mp);
1255         if (error != 0) {
1256                 goto out;
1257         }
1258 #endif
1259
1260 out:
1261         if (error) {
1262                 lck_rw_done(&mp->mnt_rwlock);
1263         }
1264
1265         return error;
1266 }
1267
1268 static void
1269 mount_end_update(mount_t mp)
1270 {
1271         lck_rw_done(&mp->mnt_rwlock);
1272 }
1273
1274 static int
1275 get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1276 {
1277         vnode_t vp;
1278
1279         if (height >= MAX_IMAGEBOOT_NESTING) {
1280                 return EINVAL;
1281         }
1282
1283         vp = imgsrc_rootvnodes[height];
1284         if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1285                 *rvpp = vp;
1286                 return 0;
1287         } else {
1288                 return ENOENT;
1289         }
1290 }
1291
1292 static int
1293 relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1294                 const char *fsname, vfs_context_t ctx,
1295                 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
1296 {
1297         int error;
1298         mount_t mp;
1299         boolean_t placed = FALSE;
1300         vnode_t devvp = NULLVP;
1301         struct vfstable *vfsp;
1302         user_addr_t devpath;
1303         char *old_mntonname;
1304         vnode_t rvp;
1305         uint32_t height;
1306         uint32_t flags;
1307
1308         /* If we didn't imageboot, nothing to move */
1309         if (imgsrc_rootvnodes[0] == NULLVP) {
1310                 return EINVAL;
1311         }
1312
1313         /* Only root can do this */
1314         if (!vfs_context_issuser(ctx)) {
1315                 return EPERM;
1316         }
1317
1318         IMGSRC_DEBUG("looking for root vnode.\n");
1319
1320         /*
1321          * Get root vnode of filesystem we're moving.
1322          */
1323         if (by_index) {
1324                 if (is64bit) {
1325                         struct user64_mnt_imgsrc_args mia64;
1326                         error = copyin(fsmountargs, &mia64, sizeof(mia64));
1327                         if (error != 0) {
1328                                 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1329                                 return error;
1330                         }
1331
1332                         height = mia64.mi_height;
1333                         flags = mia64.mi_flags;
1334                         devpath = mia64.mi_devpath;
1335                 } else {
1336                         struct user32_mnt_imgsrc_args mia32;
1337                         error = copyin(fsmountargs, &mia32, sizeof(mia32));
1338                         if (error != 0) {
1339                                 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1340                                 return error;
1341                         }
1342
1343                         height = mia32.mi_height;
1344                         flags = mia32.mi_flags;
1345                         devpath = mia32.mi_devpath;
1346                 }
1347         } else {
1348                 /*
1349                  * For binary compatibility--assumes one level of nesting.
1350                  */
1351                 if (is64bit) {
1352                         if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1353                                 return error;
1354                 } else {
1355                         user32_addr_t tmp;
1356                         if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1357                                 return error;
1358
1359                         /* munge into LP64 addr */
1360                         devpath = CAST_USER_ADDR_T(tmp);
1361                 }
1362
1363                 height = 0;
1364                 flags = 0;
1365         }
1366
1367         if (flags != 0) {
1368                 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1369                 return EINVAL;
1370         }
1371
1372         error = get_imgsrc_rootvnode(height, &rvp);
1373         if (error != 0) {
1374                 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
1375                 return error;
1376         }
1377
1378         IMGSRC_DEBUG("got root vnode.\n");
1379
1380         MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1381
1382         /* Can only move once */
1383         mp = vnode_mount(rvp);
1384         if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1385                 IMGSRC_DEBUG("Already moved.\n");
1386                 error = EBUSY;
1387                 goto out0;
1388         }
1389
1390         IMGSRC_DEBUG("Starting updated.\n");
1391
1392         /* Get exclusive rwlock on mount, authorize update on mp */
1393         error = mount_begin_update(mp , ctx, 0);
1394         if (error != 0) {
1395                 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
1396                 goto out0;
1397         }
1398
1399         /*
1400          * It can only be moved once.  Flag is set under the rwlock,
1401          * so we're now safe to proceed.
1402          */
1403         if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1404                 IMGSRC_DEBUG("Already moved [2]\n");
1405                 goto out1;
1406         }
1407
1408
1409         IMGSRC_DEBUG("Preparing coveredvp.\n");
1410
1411         /* Mark covered vnode as mount in progress, authorize placing mount on top */
1412         error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
1413         if (error != 0) {
1414                 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
1415                 goto out1;
1416         }
1417
1418         IMGSRC_DEBUG("Covered vp OK.\n");
1419
1420         /* Sanity check the name caller has provided */
1421         vfsp = mp->mnt_vtable;
1422         if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1423                 IMGSRC_DEBUG("Wrong fs name.\n");
1424                 error = EINVAL;
1425                 goto out2;
1426         }
1427
1428         /* Check the device vnode and update mount-from name, for local filesystems */
1429         if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1430                 IMGSRC_DEBUG("Local, doing device validation.\n");
1431
1432                 if (devpath != USER_ADDR_NULL) {
1433                         error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1434                         if (error) {
1435                                 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1436                                 goto out2;
1437                         }
1438
1439                         vnode_put(devvp);
1440                 }
1441         }
1442
1443         /*
1444          * Place mp on top of vnode, ref the vnode,  call checkdirs(),
1445          * and increment the name cache's mount generation
1446          */
1447
1448         IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1449         error = place_mount_and_checkdirs(mp, vp, ctx);
1450         if (error != 0) {
1451                 goto out2;
1452         }
1453
1454         placed = TRUE;
1455
1456         strncpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1457         strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1458
1459         /* Forbid future moves */
1460         mount_lock(mp);
1461         mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1462         mount_unlock(mp);
1463
1464         /* Finally, add to mount list, completely ready to go */
1465         if (mount_list_add(mp) != 0) {
1466                 /*
1467                  * The system is shutting down trying to umount
1468                  * everything, so fail with a plausible errno.
1469                  */
1470                 error = EBUSY;
1471                 goto out3;
1472         }
1473
1474         mount_end_update(mp);
1475         vnode_put(rvp);
1476         FREE(old_mntonname, M_TEMP);
1477
1478         vfs_notify_mount(pvp);
1479
1480         return 0;
1481 out3:
1482         strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1483
1484         mount_lock(mp);
1485         mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1486         mount_unlock(mp);
1487
1488 out2:
1489         /*
1490          * Placing the mp on the vnode clears VMOUNT,
1491          * so cleanup is different after that point
1492          */
1493         if (placed) {
1494                 /* Rele the vp, clear VMOUNT and v_mountedhere */
1495                 undo_place_on_covered_vp(mp, vp);
1496         } else {
1497                 vnode_lock_spin(vp);
1498                 CLR(vp->v_flag, VMOUNT);
1499                 vnode_unlock(vp);
1500         }
1501 out1:
1502         mount_end_update(mp);
1503
1504 out0:
1505         vnode_put(rvp);
1506         FREE(old_mntonname, M_TEMP);
1507         return error;
1508 }
1509
1510 #endif /* CONFIG_IMGSRC_ACCESS */
1511
1512 void
1513 enablequotas(struct mount *mp, vfs_context_t ctx)
1514 {
1515         struct nameidata qnd;
1516         int type;
1517         char qfpath[MAXPATHLEN];
1518         const char *qfname = QUOTAFILENAME;
1519         const char *qfopsname = QUOTAOPSNAME;
1520         const char *qfextension[] = INITQFNAMES;
1521
1522         /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1523         if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1524                 return;
1525         }
1526         /*
1527          * Enable filesystem disk quotas if necessary.
1528          * We ignore errors as this should not interfere with final mount
1529          */
1530         for (type=0; type < MAXQUOTAS; type++) {
1531                 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1532                 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1533                        CAST_USER_ADDR_T(qfpath), ctx);
1534                 if (namei(&qnd) != 0)
1535                         continue;           /* option file to trigger quotas is not present */
1536                 vnode_put(qnd.ni_vp);
1537                 nameidone(&qnd);
1538                 snprintf(qfpath, sizeof(qfpath),  "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1539
1540                 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1541         }
1542         return;
1543 }
1544
1545
1546 static int
1547 checkdirs_callback(proc_t p, void * arg)
1548 {
1549         struct cdirargs * cdrp = (struct cdirargs * )arg;
1550         vnode_t olddp = cdrp->olddp;
1551         vnode_t newdp = cdrp->newdp;
1552         struct filedesc *fdp;
1553         vnode_t tvp;
1554         vnode_t fdp_cvp;
1555         vnode_t fdp_rvp;
1556         int cdir_changed = 0;
1557         int rdir_changed = 0;
1558
1559         /*
1560          * XXX Also needs to iterate each thread in the process to see if it
1561          * XXX is using a per-thread current working directory, and, if so,
1562          * XXX update that as well.
1563          */
1564
1565         proc_fdlock(p);
1566         fdp = p->p_fd;
1567         if (fdp == (struct filedesc *)0) {
1568                 proc_fdunlock(p);
1569                 return(PROC_RETURNED);
1570         }
1571         fdp_cvp = fdp->fd_cdir;
1572         fdp_rvp = fdp->fd_rdir;
1573         proc_fdunlock(p);
1574
1575         if (fdp_cvp == olddp) {
1576                 vnode_ref(newdp);
1577                 tvp = fdp->fd_cdir;
1578                 fdp_cvp = newdp;
1579                 cdir_changed = 1;
1580                 vnode_rele(tvp);
1581         }
1582         if (fdp_rvp == olddp) {
1583                 vnode_ref(newdp);
1584                 tvp = fdp->fd_rdir;
1585                 fdp_rvp = newdp;
1586                 rdir_changed = 1;
1587                 vnode_rele(tvp);
1588         }
1589         if (cdir_changed || rdir_changed) {
1590                 proc_fdlock(p);
1591                 fdp->fd_cdir = fdp_cvp;
1592                 fdp->fd_rdir = fdp_rvp;
1593                 proc_fdunlock(p);
1594         }
1595         return(PROC_RETURNED);
1596 }
1597
1598
1599
1600 /*
1601  * Scan all active processes to see if any of them have a current
1602  * or root directory onto which the new filesystem has just been
1603  * mounted. If so, replace them with the new mount point.
1604  */
1605 static int
1606 checkdirs(vnode_t olddp, vfs_context_t ctx)
1607 {
1608         vnode_t newdp;
1609         vnode_t tvp;
1610         int err;
1611         struct cdirargs cdr;
1612         struct uthread * uth = get_bsdthread_info(current_thread());
1613
1614         if (olddp->v_usecount == 1)
1615                 return(0);
1616         if (uth != (struct uthread *)0)
1617                 uth->uu_notrigger = 1;
1618         err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
1619         if (uth != (struct uthread *)0)
1620                 uth->uu_notrigger = 0;
1621
1622         if (err != 0) {
1623 #if DIAGNOSTIC
1624                 panic("mount: lost mount: error %d", err);
1625 #endif
1626                 return(err);
1627         }
1628
1629         cdr.olddp = olddp;
1630         cdr.newdp = newdp;
1631         /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1632         proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
1633
1634         if (rootvnode == olddp) {
1635                 vnode_ref(newdp);
1636                 tvp = rootvnode;
1637                 rootvnode = newdp;
1638                 vnode_rele(tvp);
1639         }
1640
1641         vnode_put(newdp);
1642         return(0);
1643 }
1644
1645 /*
1646  * Unmount a file system.
1647  *
1648  * Note: unmount takes a path to the vnode mounted on as argument,
1649  * not special file (as before).
1650  */
1651 /* ARGSUSED */
1652 int
1653 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1654 {
1655         vnode_t vp;
1656         struct mount *mp;
1657         int error;
1658         struct nameidata nd;
1659         vfs_context_t ctx = vfs_context_current();
1660
1661         NDINIT(&nd, LOOKUP, OP_UNMOUNT, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1662                 UIO_USERSPACE, uap->path, ctx);
1663         error = namei(&nd);
1664         if (error)
1665                 return (error);
1666         vp = nd.ni_vp;
1667         mp = vp->v_mount;
1668         nameidone(&nd);
1669
1670 #if CONFIG_MACF
1671         error = mac_mount_check_umount(ctx, mp);
1672         if (error != 0) {
1673                 vnode_put(vp);
1674                 return (error);
1675         }
1676 #endif
1677         /*
1678          * Must be the root of the filesystem
1679          */
1680         if ((vp->v_flag & VROOT) == 0) {
1681                 vnode_put(vp);
1682                 return (EINVAL);
1683         }
1684         mount_ref(mp, 0);
1685         vnode_put(vp);
1686         /* safedounmount consumes the mount ref */
1687         return (safedounmount(mp, uap->flags, ctx));
1688 }
1689
1690 int
1691 vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
1692 {
1693         mount_t mp;
1694
1695         mp = mount_list_lookupby_fsid(fsid, 0, 1);
1696         if (mp == (mount_t)0) {
1697                 return(ENOENT);
1698         }
1699         mount_ref(mp, 0);
1700         mount_iterdrop(mp);
1701         /* safedounmount consumes the mount ref */
1702         return(safedounmount(mp, flags, ctx));
1703 }
1704
1705
1706 /*
1707  * The mount struct comes with a mount ref which will be consumed.
1708  * Do the actual file system unmount, prevent some common foot shooting.
1709  */
1710 int
1711 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1712 {
1713         int error;
1714         proc_t p = vfs_context_proc(ctx);
1715
1716         /*
1717          * If the file system is not responding and MNT_NOBLOCK
1718          * is set and not a forced unmount then return EBUSY.
1719          */
1720         if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1721                 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1722                 error = EBUSY;
1723                 goto out;
1724         }
1725
1726         /*
1727          * Skip authorization if the mount is tagged as permissive and
1728          * this is not a forced-unmount attempt.
1729          */
1730         if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1731                 /*
1732                  * Only root, or the user that did the original mount is
1733                  * permitted to unmount this filesystem.
1734                  */
1735                 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1736                                 (error = suser(kauth_cred_get(), &p->p_acflag)))
1737                         goto out;
1738         }
1739         /*
1740          * Don't allow unmounting the root file system.
1741          */
1742         if (mp->mnt_flag & MNT_ROOTFS) {
1743                 error = EBUSY; /* the root is always busy */
1744                 goto out;
1745         }
1746
1747 #ifdef CONFIG_IMGSRC_ACCESS
1748         if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1749                 error = EBUSY;
1750                 goto out;
1751         }
1752 #endif /* CONFIG_IMGSRC_ACCESS */
1753
1754         return (dounmount(mp, flags, 1, ctx));
1755
1756 out:
1757         mount_drop(mp, 0);
1758         return(error);
1759 }
1760
1761 /*
1762  * Do the actual file system unmount.
1763  */
1764 int
1765 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1766 {
1767         vnode_t coveredvp = (vnode_t)0;
1768         int error;
1769         int needwakeup = 0;
1770         int forcedunmount = 0;
1771         int lflags = 0;
1772         struct vnode *devvp = NULLVP;
1773 #if CONFIG_TRIGGERS
1774         int did_vflush = 0;
1775 #endif /* CONFIG_TRIGGERS */
1776
1777         if (flags & MNT_FORCE)
1778                 forcedunmount = 1;
1779
1780         mount_lock(mp);
1781         /* XXX post jaguar fix LK_DRAIN - then clean this up */
1782         if ((flags & MNT_FORCE)) {
1783                 mp->mnt_kern_flag |= MNTK_FRCUNMOUNT;
1784                 mp->mnt_lflag |= MNT_LFORCE;
1785         }
1786         if (mp->mnt_lflag & MNT_LUNMOUNT) {
1787                 mp->mnt_lflag |= MNT_LWAIT;
1788                 if(withref != 0)
1789                         mount_drop(mp, 1);
1790                 msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "dounmount", NULL);
1791                 /*
1792                  * The prior unmount attempt has probably succeeded.
1793                  * Do not dereference mp here - returning EBUSY is safest.
1794                  */
1795                 return (EBUSY);
1796         }
1797         mp->mnt_kern_flag |= MNTK_UNMOUNT;
1798         mp->mnt_lflag |= MNT_LUNMOUNT;
1799         mp->mnt_flag &=~ MNT_ASYNC;
1800         /*
1801          * anyone currently in the fast path that
1802          * trips over the cached rootvp will be
1803          * dumped out and forced into the slow path
1804          * to regenerate a new cached value
1805          */
1806         mp->mnt_realrootvp = NULLVP;
1807         mount_unlock(mp);
1808
1809         /*
1810          * taking the name_cache_lock exclusively will
1811          * insure that everyone is out of the fast path who
1812          * might be trying to use a now stale copy of
1813          * vp->v_mountedhere->mnt_realrootvp
1814          * bumping mount_generation causes the cached values
1815          * to be invalidated
1816          */
1817         name_cache_lock();
1818         mount_generation++;
1819         name_cache_unlock();
1820
1821
1822         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1823         if (withref != 0)
1824                 mount_drop(mp, 0);
1825 #if CONFIG_FSE
1826         fsevent_unmount(mp);  /* has to come first! */
1827 #endif
1828         error = 0;
1829         if (forcedunmount == 0) {
1830                 ubc_umount(mp); /* release cached vnodes */
1831                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1832                         error = VFS_SYNC(mp, MNT_WAIT, ctx);
1833                         if (error) {
1834                                 mount_lock(mp);
1835                                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1836                                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1837                                 mp->mnt_lflag &= ~MNT_LFORCE;
1838                                 goto out;
1839                         }
1840                 }
1841         }
1842
1843 #if CONFIG_TRIGGERS
1844         vfs_nested_trigger_unmounts(mp, flags, ctx);
1845         did_vflush = 1;
1846 #endif
1847         if (forcedunmount)
1848                 lflags |= FORCECLOSE;
1849         error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM  | SKIPROOT | lflags);
1850         if ((forcedunmount == 0) && error) {
1851                 mount_lock(mp);
1852                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1853                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1854                 mp->mnt_lflag &= ~MNT_LFORCE;
1855                 goto out;
1856         }
1857
1858         /* make sure there are no one in the mount iterations or lookup */
1859         mount_iterdrain(mp);
1860
1861         error = VFS_UNMOUNT(mp, flags, ctx);
1862         if (error) {
1863                 mount_iterreset(mp);
1864                 mount_lock(mp);
1865                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1866                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1867                 mp->mnt_lflag &= ~MNT_LFORCE;
1868                 goto out;
1869         }
1870
1871         /* increment the operations count */
1872         if (!error)
1873                 OSAddAtomic(1, &vfs_nummntops);
1874
1875         if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1876                 /* hold an io reference and drop the usecount before close */
1877                 devvp = mp->mnt_devvp;
1878                 vnode_getalways(devvp);
1879                 vnode_rele(devvp);
1880                 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1881                        ctx);
1882                 vnode_clearmountedon(devvp);
1883                 vnode_put(devvp);
1884         }
1885         lck_rw_done(&mp->mnt_rwlock);
1886         mount_list_remove(mp);
1887         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1888
1889         /* mark the mount point hook in the vp but not drop the ref yet */
1890         if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
1891                 vnode_getwithref(coveredvp);
1892                 vnode_lock_spin(coveredvp);
1893
1894                 mp->mnt_crossref++;
1895                 coveredvp->v_mountedhere = (struct mount *)0;
1896
1897                 vnode_unlock(coveredvp);
1898                 vnode_put(coveredvp);
1899         }
1900
1901         mount_list_lock();
1902         mp->mnt_vtable->vfc_refcount--;
1903         mount_list_unlock();
1904
1905         cache_purgevfs(mp);     /* remove cache entries for this file sys */
1906         vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
1907         mount_lock(mp);
1908         mp->mnt_lflag |= MNT_LDEAD;
1909
1910         if (mp->mnt_lflag & MNT_LWAIT) {
1911                 /*
1912                  * do the wakeup here
1913                  * in case we block in mount_refdrain
1914                  * which will drop the mount lock
1915                  * and allow anyone blocked in vfs_busy
1916                  * to wakeup and see the LDEAD state
1917                  */
1918                 mp->mnt_lflag &= ~MNT_LWAIT;
1919                 wakeup((caddr_t)mp);
1920         }
1921         mount_refdrain(mp);
1922 out:
1923         if (mp->mnt_lflag & MNT_LWAIT) {
1924                 mp->mnt_lflag &= ~MNT_LWAIT;
1925                 needwakeup = 1;
1926         }
1927
1928
1929 #if CONFIG_TRIGGERS
1930         /*
1931          * Callback and context are set together under the mount lock, and
1932          * never cleared, so we're safe to examine them here, drop the lock,
1933          * and call out.
1934          */
1935         if (mp->mnt_triggercallback != NULL) {
1936                 mount_unlock(mp);
1937                 if (error == 0) {
1938                         mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
1939                 } else if (did_vflush) {
1940                         mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
1941                 }
1942         } else {
1943                 mount_unlock(mp);
1944         }
1945 #else
1946         mount_unlock(mp);
1947 #endif /* CONFIG_TRIGGERS */
1948
1949         lck_rw_done(&mp->mnt_rwlock);
1950
1951         if (needwakeup)
1952                 wakeup((caddr_t)mp);
1953
1954         if (!error) {
1955                 if ((coveredvp != NULLVP)) {
1956                         vnode_t pvp;
1957
1958                         vnode_getwithref(coveredvp);
1959                         pvp = vnode_getparent(coveredvp);
1960                         vnode_rele(coveredvp);
1961
1962                         mount_dropcrossref(mp, coveredvp, 0);
1963 #if CONFIG_TRIGGERS
1964                         if (coveredvp->v_resolve)
1965                                 vnode_trigger_rearm(coveredvp, ctx);
1966 #endif
1967                         vnode_put(coveredvp);
1968
1969                         if (pvp) {
1970                                 lock_vnode_and_post(pvp, NOTE_WRITE);
1971                                 vnode_put(pvp);
1972                         }
1973                 } else if (mp->mnt_flag & MNT_ROOTFS) {
1974                                 mount_lock_destroy(mp);
1975 #if CONFIG_MACF
1976                                 mac_mount_label_destroy(mp);
1977 #endif
1978                                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1979                 } else
1980                         panic("dounmount: no coveredvp");
1981         }
1982         return (error);
1983 }
1984
1985 void
1986 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
1987 {
1988         vnode_lock(dp);
1989         mp->mnt_crossref--;
1990
1991         if (mp->mnt_crossref < 0)
1992                 panic("mount cross refs -ve");
1993
1994         if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
1995
1996                 if (need_put)
1997                         vnode_put_locked(dp);
1998                 vnode_unlock(dp);
1999
2000                 mount_lock_destroy(mp);
2001 #if CONFIG_MACF
2002                 mac_mount_label_destroy(mp);
2003 #endif
2004                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2005                 return;
2006         }
2007         if (need_put)
2008                 vnode_put_locked(dp);
2009         vnode_unlock(dp);
2010 }
2011
2012
2013 /*
2014  * Sync each mounted filesystem.
2015  */
2016 #if DIAGNOSTIC
2017 int syncprt = 0;
2018 struct ctldebug debug0 = { "syncprt", &syncprt };
2019 #endif
2020
2021 int print_vmpage_stat=0;
2022
2023 static int
2024 sync_callback(mount_t mp, void * arg)
2025 {
2026         int asyncflag;
2027
2028         if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2029                         asyncflag = mp->mnt_flag & MNT_ASYNC;
2030                         mp->mnt_flag &= ~MNT_ASYNC;
2031                         VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_current());
2032                         if (asyncflag)
2033                                 mp->mnt_flag |= MNT_ASYNC;
2034         }
2035         return(VFS_RETURNED);
2036 }
2037
2038
2039 #include <kern/clock.h>
2040
2041 clock_sec_t sync_wait_time = 0;
2042
2043 /* ARGSUSED */
2044 int
2045 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
2046 {
2047         clock_nsec_t nsecs;
2048
2049         vfs_iterate(LK_NOWAIT, sync_callback, (void *)0);
2050
2051         {
2052                 static fsid_t fsid = { { 0, 0 } };
2053
2054                 clock_get_calendar_microtime(&sync_wait_time, &nsecs);
2055                 vfs_event_signal(&fsid, VQ_SYNCEVENT, (intptr_t)NULL);
2056                 wakeup((caddr_t)&sync_wait_time);
2057         }
2058
2059         {
2060         if(print_vmpage_stat) {
2061                 vm_countdirtypages();
2062         }
2063         }
2064 #if DIAGNOSTIC
2065         if (syncprt)
2066                 vfs_bufstats();
2067 #endif /* DIAGNOSTIC */
2068         return (0);
2069 }
2070
2071 /*
2072  * Change filesystem quotas.
2073  */
2074 #if QUOTA
2075 static int quotactl_funneled(proc_t p, struct quotactl_args *uap, int32_t *retval);
2076
2077 int
2078 quotactl(proc_t p, struct quotactl_args *uap, int32_t *retval)
2079 {
2080         boolean_t funnel_state;
2081         int error;
2082
2083         funnel_state = thread_funnel_set(kernel_flock, TRUE);
2084         error = quotactl_funneled(p, uap, retval);
2085         thread_funnel_set(kernel_flock, funnel_state);
2086         return(error);
2087 }
2088
2089 static int
2090 quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
2091 {
2092         struct mount *mp;
2093         int error, quota_cmd, quota_status;
2094         caddr_t datap;
2095         size_t fnamelen;
2096         struct nameidata nd;
2097         vfs_context_t ctx = vfs_context_current();
2098         struct dqblk my_dqblk;
2099
2100         AUDIT_ARG(uid, uap->uid);
2101         AUDIT_ARG(cmd, uap->cmd);
2102         NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2103                uap->path, ctx);
2104         error = namei(&nd);
2105         if (error)
2106                 return (error);
2107         mp = nd.ni_vp->v_mount;
2108         vnode_put(nd.ni_vp);
2109         nameidone(&nd);
2110
2111         /* copyin any data we will need for downstream code */
2112         quota_cmd = uap->cmd >> SUBCMDSHIFT;
2113
2114         switch (quota_cmd) {
2115         case Q_QUOTAON:
2116                 /* uap->arg specifies a file from which to take the quotas */
2117                 fnamelen = MAXPATHLEN;
2118                 datap = kalloc(MAXPATHLEN);
2119                 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2120                 break;
2121         case Q_GETQUOTA:
2122                 /* uap->arg is a pointer to a dqblk structure. */
2123                 datap = (caddr_t) &my_dqblk;
2124                 break;
2125         case Q_SETQUOTA:
2126         case Q_SETUSE:
2127                 /* uap->arg is a pointer to a dqblk structure. */
2128                 datap = (caddr_t) &my_dqblk;
2129                 if (proc_is64bit(p)) {
2130                         struct user_dqblk       my_dqblk64;
2131                         error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2132                         if (error == 0) {
2133                                 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2134                         }
2135                 }
2136                 else {
2137                         error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2138                 }
2139                 break;
2140         case Q_QUOTASTAT:
2141                 /* uap->arg is a pointer to an integer */
2142                 datap = (caddr_t) &quota_status;
2143                 break;
2144         default:
2145                 datap = NULL;
2146                 break;
2147         } /* switch */
2148
2149         if (error == 0) {
2150                 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
2151         }
2152
2153         switch (quota_cmd) {
2154         case Q_QUOTAON:
2155                 if (datap != NULL)
2156                         kfree(datap, MAXPATHLEN);
2157                 break;
2158         case Q_GETQUOTA:
2159                 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2160                 if (error == 0) {
2161                         if (proc_is64bit(p)) {
2162                                 struct user_dqblk       my_dqblk64;
2163                                 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2164                                 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2165                         }
2166                         else {
2167                                 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2168                         }
2169                 }
2170                 break;
2171         case Q_QUOTASTAT:
2172                 /* uap->arg is a pointer to an integer */
2173                 if (error == 0) {
2174                         error = copyout(datap, uap->arg, sizeof(quota_status));
2175                 }
2176                 break;
2177         default:
2178                 break;
2179         } /* switch */
2180
2181         return (error);
2182 }
2183 #else
2184 int
2185 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2186 {
2187         return (EOPNOTSUPP);
2188 }
2189 #endif /* QUOTA */
2190
2191 /*
2192  * Get filesystem statistics.
2193  *
2194  * Returns:     0                       Success
2195  *      namei:???
2196  *      vfs_update_vfsstat:???
2197  *      munge_statfs:EFAULT
2198  */
2199 /* ARGSUSED */
2200 int
2201 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
2202 {
2203         struct mount *mp;
2204         struct vfsstatfs *sp;
2205         int error;
2206         struct nameidata nd;
2207         vfs_context_t ctx = vfs_context_current();
2208         vnode_t vp;
2209
2210         NDINIT(&nd, LOOKUP, OP_STATFS, NOTRIGGER | FOLLOW | AUDITVNPATH1,
2211                 UIO_USERSPACE, uap->path, ctx);
2212         error = namei(&nd);
2213         if (error)
2214                 return (error);
2215         vp = nd.ni_vp;
2216         mp = vp->v_mount;
2217         sp = &mp->mnt_vfsstat;
2218         nameidone(&nd);
2219
2220         error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
2221         vnode_put(vp);
2222         if (error != 0)
2223                 return (error);
2224
2225         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2226         return (error);
2227 }
2228
2229 /*
2230  * Get filesystem statistics.
2231  */
2232 /* ARGSUSED */
2233 int
2234 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
2235 {
2236         vnode_t vp;
2237         struct mount *mp;
2238         struct vfsstatfs *sp;
2239         int error;
2240
2241         AUDIT_ARG(fd, uap->fd);
2242
2243         if ( (error = file_vnode(uap->fd, &vp)) )
2244                 return (error);
2245
2246         error = vnode_getwithref(vp);
2247         if (error) {
2248                 file_drop(uap->fd);
2249                 return (error);
2250         }
2251
2252         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2253
2254         mp = vp->v_mount;
2255         if (!mp) {
2256                 error = EBADF;
2257                 goto out;
2258         }
2259         sp = &mp->mnt_vfsstat;
2260         if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
2261                 goto out;
2262         }
2263
2264         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2265
2266 out:
2267         file_drop(uap->fd);
2268         vnode_put(vp);
2269
2270         return (error);
2271 }
2272
2273 /*
2274  * Common routine to handle copying of statfs64 data to user space
2275  */
2276 static int
2277 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2278 {
2279         int error;
2280         struct statfs64 sfs;
2281
2282         bzero(&sfs, sizeof(sfs));
2283
2284         sfs.f_bsize = sfsp->f_bsize;
2285         sfs.f_iosize = (int32_t)sfsp->f_iosize;
2286         sfs.f_blocks = sfsp->f_blocks;
2287         sfs.f_bfree = sfsp->f_bfree;
2288         sfs.f_bavail = sfsp->f_bavail;
2289         sfs.f_files = sfsp->f_files;
2290         sfs.f_ffree = sfsp->f_ffree;
2291         sfs.f_fsid = sfsp->f_fsid;
2292         sfs.f_owner = sfsp->f_owner;
2293         sfs.f_type = mp->mnt_vtable->vfc_typenum;
2294         sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2295         sfs.f_fssubtype = sfsp->f_fssubtype;
2296         if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2297                 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2298         } else {
2299                 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2300         }
2301         strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2302         strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2303
2304         error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2305
2306         return(error);
2307 }
2308
2309 /*
2310  * Get file system statistics in 64-bit mode
2311  */
2312 int
2313 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2314 {
2315         struct mount *mp;
2316         struct vfsstatfs *sp;
2317         int error;
2318         struct nameidata nd;
2319         vfs_context_t ctxp = vfs_context_current();
2320         vnode_t vp;
2321
2322         NDINIT(&nd, LOOKUP, OP_STATFS, NOTRIGGER | FOLLOW | AUDITVNPATH1,
2323                 UIO_USERSPACE, uap->path, ctxp);
2324         error = namei(&nd);
2325         if (error)
2326                 return (error);
2327         vp = nd.ni_vp;
2328         mp = vp->v_mount;
2329         sp = &mp->mnt_vfsstat;
2330         nameidone(&nd);
2331
2332         error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
2333         vnode_put(vp);
2334         if (error != 0)
2335                 return (error);
2336
2337         error = statfs64_common(mp, sp, uap->buf);
2338
2339         return (error);
2340 }
2341
2342 /*
2343  * Get file system statistics in 64-bit mode
2344  */
2345 int
2346 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2347 {
2348         struct vnode *vp;
2349         struct mount *mp;
2350         struct vfsstatfs *sp;
2351         int error;
2352
2353         AUDIT_ARG(fd, uap->fd);
2354
2355         if ( (error = file_vnode(uap->fd, &vp)) )
2356                 return (error);
2357
2358         error = vnode_getwithref(vp);
2359         if (error) {
2360                 file_drop(uap->fd);
2361                 return (error);
2362         }
2363
2364         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2365
2366         mp = vp->v_mount;
2367         if (!mp) {
2368                 error = EBADF;
2369                 goto out;
2370         }
2371         sp = &mp->mnt_vfsstat;
2372         if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2373                 goto out;
2374         }
2375
2376         error = statfs64_common(mp, sp, uap->buf);
2377
2378 out:
2379         file_drop(uap->fd);
2380         vnode_put(vp);
2381
2382         return (error);
2383 }
2384
2385 struct getfsstat_struct {
2386         user_addr_t     sfsp;
2387         user_addr_t     *mp;
2388         int             count;
2389         int             maxcount;
2390         int             flags;
2391         int             error;
2392 };
2393
2394
2395 static int
2396 getfsstat_callback(mount_t mp, void * arg)
2397 {
2398
2399         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2400         struct vfsstatfs *sp;
2401         int error, my_size;
2402         vfs_context_t ctx = vfs_context_current();
2403
2404         if (fstp->sfsp && fstp->count < fstp->maxcount) {
2405                 sp = &mp->mnt_vfsstat;
2406                 /*
2407                  * If MNT_NOWAIT is specified, do not refresh the
2408                  * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2409                  */
2410                 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2411                         (error = vfs_update_vfsstat(mp, ctx,
2412                             VFS_USER_EVENT))) {
2413                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2414                         return(VFS_RETURNED);
2415                 }
2416
2417                 /*
2418                  * Need to handle LP64 version of struct statfs
2419                  */
2420                 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
2421                 if (error) {
2422                         fstp->error = error;
2423                         return(VFS_RETURNED_DONE);
2424                 }
2425                 fstp->sfsp += my_size;
2426
2427                 if (fstp->mp) {
2428                         error = mac_mount_label_get(mp, *fstp->mp);
2429                         if (error) {
2430                                 fstp->error = error;
2431                                 return(VFS_RETURNED_DONE);
2432                         }
2433                         fstp->mp++;
2434                 }
2435         }
2436         fstp->count++;
2437         return(VFS_RETURNED);
2438 }
2439
2440 /*
2441  * Get statistics on all filesystems.
2442  */
2443 int
2444 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2445 {
2446         struct __mac_getfsstat_args muap;
2447
2448         muap.buf = uap->buf;
2449         muap.bufsize = uap->bufsize;
2450         muap.mac = USER_ADDR_NULL;
2451         muap.macsize = 0;
2452         muap.flags = uap->flags;
2453
2454         return (__mac_getfsstat(p, &muap, retval));
2455 }
2456
2457 /*
2458  * __mac_getfsstat: Get MAC-related file system statistics
2459  *
2460  * Parameters:    p                        (ignored)
2461  *                uap                      User argument descriptor (see below)
2462  *                retval                   Count of file system statistics (N stats)
2463  *
2464  * Indirect:      uap->bufsize             Buffer size
2465  *                uap->macsize             MAC info size
2466  *                uap->buf                 Buffer where information will be returned
2467  *                uap->mac                 MAC info
2468  *                uap->flags               File system flags
2469  *
2470  *
2471  * Returns:        0                       Success
2472  *                !0                       Not success
2473  *
2474  */
2475 int
2476 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
2477 {
2478         user_addr_t sfsp;
2479         user_addr_t *mp;
2480         size_t count, maxcount, bufsize, macsize;
2481         struct getfsstat_struct fst;
2482
2483         bufsize = (size_t) uap->bufsize;
2484         macsize = (size_t) uap->macsize;
2485
2486         if (IS_64BIT_PROCESS(p)) {
2487                 maxcount = bufsize / sizeof(struct user64_statfs);
2488         }
2489         else {
2490                 maxcount = bufsize / sizeof(struct user32_statfs);
2491         }
2492         sfsp = uap->buf;
2493         count = 0;
2494
2495         mp = NULL;
2496
2497 #if CONFIG_MACF
2498         if (uap->mac != USER_ADDR_NULL) {
2499                 u_int32_t *mp0;
2500                 int error;
2501                 unsigned int i;
2502
2503                 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2504                 if (count != maxcount)
2505                         return (EINVAL);
2506
2507                 /* Copy in the array */
2508                 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2509                 if (mp0 == NULL) {
2510                         return (ENOMEM);
2511                 }
2512
2513                 error = copyin(uap->mac, mp0, macsize);
2514                 if (error) {
2515                         FREE(mp0, M_MACTEMP);
2516                         return (error);
2517                 }
2518
2519                 /* Normalize to an array of user_addr_t */
2520                 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
2521                 if (mp == NULL) {
2522                         FREE(mp0, M_MACTEMP);
2523                         return (ENOMEM);
2524                 }
2525
2526                 for (i = 0; i < count; i++) {
2527                         if (IS_64BIT_PROCESS(p))
2528                                 mp[i] = ((user_addr_t *)mp0)[i];
2529                         else
2530                                 mp[i] = (user_addr_t)mp0[i];
2531                 }
2532                 FREE(mp0, M_MACTEMP);
2533         }
2534 #endif
2535
2536
2537         fst.sfsp = sfsp;
2538         fst.mp = mp;
2539         fst.flags = uap->flags;
2540         fst.count = 0;
2541         fst.error = 0;
2542         fst.maxcount = maxcount;
2543
2544
2545         vfs_iterate(0, getfsstat_callback, &fst);
2546
2547         if (mp)
2548                 FREE(mp, M_MACTEMP);
2549
2550         if (fst.error ) {
2551                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2552                 return(fst.error);
2553         }
2554
2555         if (fst.sfsp && fst.count > fst.maxcount)
2556                 *retval = fst.maxcount;
2557         else
2558                 *retval = fst.count;
2559         return (0);
2560 }
2561
2562 static int
2563 getfsstat64_callback(mount_t mp, void * arg)
2564 {
2565         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2566         struct vfsstatfs *sp;
2567         int error;
2568
2569         if (fstp->sfsp && fstp->count < fstp->maxcount) {
2570                 sp = &mp->mnt_vfsstat;
2571                 /*
2572                  * If MNT_NOWAIT is specified, do not refresh the fsstat
2573                  * cache. MNT_WAIT overrides MNT_NOWAIT.
2574                  *
2575                  * We treat MNT_DWAIT as MNT_WAIT for all instances of
2576                  * getfsstat, since the constants are out of the same
2577                  * namespace.
2578                  */
2579                 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2580                      (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2581                     (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2582                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2583                         return(VFS_RETURNED);
2584                 }
2585
2586                 error = statfs64_common(mp, sp, fstp->sfsp);
2587                 if (error) {
2588                         fstp->error = error;
2589                         return(VFS_RETURNED_DONE);
2590                 }
2591                 fstp->sfsp += sizeof(struct statfs64);
2592         }
2593         fstp->count++;
2594         return(VFS_RETURNED);
2595 }
2596
2597 /*
2598  * Get statistics on all file systems in 64 bit mode.
2599  */
2600 int
2601 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2602 {
2603         user_addr_t sfsp;
2604         int count, maxcount;
2605         struct getfsstat_struct fst;
2606
2607         maxcount = uap->bufsize / sizeof(struct statfs64);
2608
2609         sfsp = uap->buf;
2610         count = 0;
2611
2612         fst.sfsp = sfsp;
2613         fst.flags = uap->flags;
2614         fst.count = 0;
2615         fst.error = 0;
2616         fst.maxcount = maxcount;
2617
2618         vfs_iterate(0, getfsstat64_callback, &fst);
2619
2620         if (fst.error ) {
2621                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2622                 return(fst.error);
2623         }
2624
2625         if (fst.sfsp && fst.count > fst.maxcount)
2626                 *retval = fst.maxcount;
2627         else
2628                 *retval = fst.count;
2629
2630         return (0);
2631 }
2632
2633 /*
2634  * Change current working directory to a given file descriptor.
2635  */
2636 /* ARGSUSED */
2637 static int
2638 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
2639 {
2640         struct filedesc *fdp = p->p_fd;
2641         vnode_t vp;
2642         vnode_t tdp;
2643         vnode_t tvp;
2644         struct mount *mp;
2645         int error;
2646         vfs_context_t ctx = vfs_context_current();
2647
2648         AUDIT_ARG(fd, uap->fd);
2649         if (per_thread && uap->fd == -1) {
2650                 /*
2651                  * Switching back from per-thread to per process CWD; verify we
2652                  * in fact have one before proceeding.  The only success case
2653                  * for this code path is to return 0 preemptively after zapping
2654                  * the thread structure contents.
2655                  */
2656                 thread_t th = vfs_context_thread(ctx);
2657                 if (th) {
2658                         uthread_t uth = get_bsdthread_info(th);
2659                         tvp = uth->uu_cdir;
2660                         uth->uu_cdir = NULLVP;
2661                         if (tvp != NULLVP) {
2662                                 vnode_rele(tvp);
2663                                 return (0);
2664                         }
2665                 }
2666                 return (EBADF);
2667         }
2668
2669         if ( (error = file_vnode(uap->fd, &vp)) )
2670                 return(error);
2671         if ( (error = vnode_getwithref(vp)) ) {
2672                 file_drop(uap->fd);
2673                 return(error);
2674         }
2675
2676         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2677
2678         if (vp->v_type != VDIR) {
2679                 error = ENOTDIR;
2680                 goto out;
2681         }
2682
2683 #if CONFIG_MACF
2684         error = mac_vnode_check_chdir(ctx, vp);
2685         if (error)
2686                 goto out;
2687 #endif
2688         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2689         if (error)
2690                 goto out;
2691
2692         while (!error && (mp = vp->v_mountedhere) != NULL) {
2693                 if (vfs_busy(mp, LK_NOWAIT)) {
2694                         error = EACCES;
2695                         goto out;
2696                 }
2697                 error = VFS_ROOT(mp, &tdp, ctx);
2698                 vfs_unbusy(mp);
2699                 if (error)
2700                         break;
2701                 vnode_put(vp);
2702                 vp = tdp;
2703         }
2704         if (error)
2705                 goto out;
2706         if ( (error = vnode_ref(vp)) )
2707                 goto out;
2708         vnode_put(vp);
2709
2710         if (per_thread) {
2711                 thread_t th = vfs_context_thread(ctx);
2712                 if (th) {
2713                         uthread_t uth = get_bsdthread_info(th);
2714                         tvp = uth->uu_cdir;
2715                         uth->uu_cdir = vp;
2716                         OSBitOrAtomic(P_THCWD, &p->p_flag);
2717                 } else {
2718                         vnode_rele(vp);
2719                         return (ENOENT);
2720                 }
2721         } else {
2722                 proc_fdlock(p);
2723                 tvp = fdp->fd_cdir;
2724                 fdp->fd_cdir = vp;
2725                 proc_fdunlock(p);
2726         }
2727
2728         if (tvp)
2729                 vnode_rele(tvp);
2730         file_drop(uap->fd);
2731
2732         return (0);
2733 out:
2734         vnode_put(vp);
2735         file_drop(uap->fd);
2736
2737         return(error);
2738 }
2739
2740 int
2741 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
2742 {
2743         return common_fchdir(p, uap, 0);
2744 }
2745
2746 int
2747 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
2748 {
2749         return common_fchdir(p, (void *)uap, 1);
2750 }
2751
2752 /*
2753  * Change current working directory (".").
2754  *
2755  * Returns:     0                       Success
2756  *      change_dir:ENOTDIR
2757  *      change_dir:???
2758  *      vnode_ref:ENOENT                No such file or directory
2759  */
2760 /* ARGSUSED */
2761 static int
2762 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
2763 {
2764         struct filedesc *fdp = p->p_fd;
2765         int error;
2766         struct nameidata nd;
2767         vnode_t tvp;
2768         vfs_context_t ctx = vfs_context_current();
2769
2770         NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
2771                 UIO_USERSPACE, uap->path, ctx);
2772         error = change_dir(&nd, ctx);
2773         if (error)
2774                 return (error);
2775         if ( (error = vnode_ref(nd.ni_vp)) ) {
2776                 vnode_put(nd.ni_vp);
2777                 return (error);
2778         }
2779         /*
2780          * drop the iocount we picked up in change_dir
2781          */
2782         vnode_put(nd.ni_vp);
2783
2784         if (per_thread) {
2785                 thread_t th = vfs_context_thread(ctx);
2786                 if (th) {
2787                         uthread_t uth = get_bsdthread_info(th);
2788                         tvp = uth->uu_cdir;
2789                         uth->uu_cdir = nd.ni_vp;
2790                         OSBitOrAtomic(P_THCWD, &p->p_flag);
2791                 } else {
2792                         vnode_rele(nd.ni_vp);
2793                         return (ENOENT);
2794                 }
2795         } else {
2796                 proc_fdlock(p);
2797                 tvp = fdp->fd_cdir;
2798                 fdp->fd_cdir = nd.ni_vp;
2799                 proc_fdunlock(p);
2800         }
2801
2802         if (tvp)
2803                 vnode_rele(tvp);
2804
2805         return (0);
2806 }
2807
2808
2809 /*
2810  * chdir
2811  *
2812  * Change current working directory (".") for the entire process
2813  *
2814  * Parameters:  p       Process requesting the call
2815  *              uap     User argument descriptor (see below)
2816  *              retval  (ignored)
2817  *
2818  * Indirect parameters: uap->path       Directory path
2819  *
2820  * Returns:     0                       Success
2821  *              common_chdir: ENOTDIR
2822  *              common_chdir: ENOENT    No such file or directory
2823  *              common_chdir: ???
2824  *
2825  */
2826 int
2827 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
2828 {
2829         return common_chdir(p, (void *)uap, 0);
2830 }
2831
2832 /*
2833  * __pthread_chdir
2834  *
2835  * Change current working directory (".") for a single thread
2836  *
2837  * Parameters:  p       Process requesting the call
2838  *              uap     User argument descriptor (see below)
2839  *              retval  (ignored)
2840  *
2841  * Indirect parameters: uap->path       Directory path
2842  *
2843  * Returns:     0                       Success
2844  *              common_chdir: ENOTDIR
2845  *              common_chdir: ENOENT    No such file or directory
2846  *              common_chdir: ???
2847  *
2848  */
2849 int
2850 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
2851 {
2852         return common_chdir(p, (void *)uap, 1);
2853 }
2854
2855
2856 /*
2857  * Change notion of root (``/'') directory.
2858  */
2859 /* ARGSUSED */
2860 int
2861 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
2862 {
2863         struct filedesc *fdp = p->p_fd;
2864         int error;
2865         struct nameidata nd;
2866         vnode_t tvp;
2867         vfs_context_t ctx = vfs_context_current();
2868
2869         if ((error = suser(kauth_cred_get(), &p->p_acflag)))
2870                 return (error);
2871
2872         NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
2873                 UIO_USERSPACE, uap->path, ctx);
2874         error = change_dir(&nd, ctx);
2875         if (error)
2876                 return (error);
2877
2878 #if CONFIG_MACF
2879         error = mac_vnode_check_chroot(ctx, nd.ni_vp,
2880             &nd.ni_cnd);
2881         if (error) {
2882                 vnode_put(nd.ni_vp);
2883                 return (error);
2884         }
2885 #endif
2886
2887         if ( (error = vnode_ref(nd.ni_vp)) ) {
2888                 vnode_put(nd.ni_vp);
2889                 return (error);
2890         }
2891         vnode_put(nd.ni_vp);
2892
2893         proc_fdlock(p);
2894         tvp = fdp->fd_rdir;
2895         fdp->fd_rdir = nd.ni_vp;
2896         fdp->fd_flags |= FD_CHROOT;
2897         proc_fdunlock(p);
2898
2899         if (tvp != NULL)
2900                 vnode_rele(tvp);
2901
2902         return (0);
2903 }
2904
2905 /*
2906  * Common routine for chroot and chdir.
2907  *
2908  * Returns:     0                       Success
2909  *              ENOTDIR                 Not a directory
2910  *              namei:???               [anything namei can return]
2911  *              vnode_authorize:???     [anything vnode_authorize can return]
2912  */
2913 static int
2914 change_dir(struct nameidata *ndp, vfs_context_t ctx)
2915 {
2916         vnode_t vp;
2917         int error;
2918
2919         if ((error = namei(ndp)))
2920                 return (error);
2921         nameidone(ndp);
2922         vp = ndp->ni_vp;
2923
2924         if (vp->v_type != VDIR) {
2925                 vnode_put(vp);
2926                 return (ENOTDIR);
2927         }
2928
2929 #if CONFIG_MACF
2930         error = mac_vnode_check_chdir(ctx, vp);
2931         if (error) {
2932                 vnode_put(vp);
2933                 return (error);
2934         }
2935 #endif
2936
2937         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2938         if (error) {
2939                 vnode_put(vp);
2940                 return (error);
2941         }
2942
2943         return (error);
2944 }
2945
2946 /*
2947  * Check permissions, allocate an open file structure,
2948  * and call the device open routine if any.
2949  *
2950  * Returns:     0                       Success
2951  *              EINVAL
2952  *              EINTR
2953  *      falloc:ENFILE
2954  *      falloc:EMFILE
2955  *      falloc:ENOMEM
2956  *      vn_open_auth:???
2957  *      dupfdopen:???
2958  *      VNOP_ADVLOCK:???
2959  *      vnode_setsize:???
2960  *
2961  * XXX Need to implement uid, gid
2962  */
2963 int
2964 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *vap, int32_t *retval)
2965 {
2966         proc_t p = vfs_context_proc(ctx);
2967         uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2968         struct filedesc *fdp = p->p_fd;
2969         struct fileproc *fp;
2970         vnode_t vp;
2971         int flags, oflags;
2972         struct fileproc *nfp;
2973         int type, indx, error;
2974         struct flock lf;
2975         int no_controlling_tty = 0;
2976         int deny_controlling_tty = 0;
2977         struct session *sessp = SESSION_NULL;
2978         struct vfs_context context = *vfs_context_current();    /* local copy */
2979
2980         oflags = uflags;
2981
2982         if ((oflags & O_ACCMODE) == O_ACCMODE)
2983                 return(EINVAL);
2984         flags = FFLAGS(uflags);
2985
2986         AUDIT_ARG(fflags, oflags);
2987         AUDIT_ARG(mode, vap->va_mode);
2988
2989         if ( (error = falloc(p, &nfp, &indx, ctx)) ) {
2990                 return (error);
2991         }
2992         fp = nfp;
2993         uu->uu_dupfd = -indx - 1;
2994
2995         if (!(p->p_flag & P_CONTROLT)) {
2996                 sessp = proc_session(p);
2997                 no_controlling_tty = 1;
2998                 /*
2999                  * If conditions would warrant getting a controlling tty if
3000                  * the device being opened is a tty (see ttyopen in tty.c),
3001                  * but the open flags deny it, set a flag in the session to
3002                  * prevent it.
3003                  */
3004                 if (SESS_LEADER(p, sessp) &&
3005                     sessp->s_ttyvp == NULL &&
3006                     (flags & O_NOCTTY)) {
3007                         session_lock(sessp);
3008                         sessp->s_flags |= S_NOCTTY;
3009                         session_unlock(sessp);
3010                         deny_controlling_tty = 1;
3011                 }
3012         }
3013
3014         if ((error = vn_open_auth(ndp, &flags, vap))) {
3015                 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){        /* XXX from fdopen */
3016                         if ((error = dupfdopen(fdp, indx, uu->uu_dupfd, flags, error)) == 0) {
3017                                 fp_drop(p, indx, NULL, 0);
3018                                 *retval = indx;
3019                                 if (deny_controlling_tty) {
3020                                         session_lock(sessp);
3021                                         sessp->s_flags &= ~S_NOCTTY;
3022                                         session_unlock(sessp);
3023                                 }
3024                                 if (sessp != SESSION_NULL)
3025                                         session_rele(sessp);
3026                                 return (0);
3027                         }
3028                 }
3029                 if (error == ERESTART)
3030                         error = EINTR;
3031                 fp_free(p, indx, fp);
3032
3033                 if (deny_controlling_tty) {
3034                         session_lock(sessp);
3035                         sessp->s_flags &= ~S_NOCTTY;
3036                         session_unlock(sessp);
3037                 }
3038                 if (sessp != SESSION_NULL)
3039                         session_rele(sessp);
3040                 return (error);
3041         }
3042         uu->uu_dupfd = 0;
3043         vp = ndp->ni_vp;
3044
3045         fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
3046         fp->f_fglob->fg_type = DTYPE_VNODE;
3047         fp->f_fglob->fg_ops = &vnops;
3048         fp->f_fglob->fg_data = (caddr_t)vp;
3049
3050 #if CONFIG_PROTECT
3051         if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) {
3052                 if (vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) {
3053                         fp->f_fglob->fg_flag |= FENCRYPTED;
3054                 }
3055         }
3056 #endif
3057
3058         if (flags & (O_EXLOCK | O_SHLOCK)) {
3059                 lf.l_whence = SEEK_SET;
3060                 lf.l_start = 0;
3061                 lf.l_len = 0;
3062                 if (flags & O_EXLOCK)
3063                         lf.l_type = F_WRLCK;
3064                 else
3065                         lf.l_type = F_RDLCK;
3066                 type = F_FLOCK;
3067                 if ((flags & FNONBLOCK) == 0)
3068                         type |= F_WAIT;
3069 #if CONFIG_MACF
3070                 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3071                     F_SETLK, &lf);
3072                 if (error)
3073                         goto bad;
3074 #endif
3075                 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx)))
3076                         goto bad;
3077                 fp->f_fglob->fg_flag |= FHASLOCK;
3078         }
3079
3080         /* try to truncate by setting the size attribute */
3081         if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3082                 goto bad;
3083
3084         /*
3085          * If the open flags denied the acquisition of a controlling tty,
3086          * clear the flag in the session structure that prevented the lower
3087          * level code from assigning one.
3088          */
3089         if (deny_controlling_tty) {
3090                 session_lock(sessp);
3091                 sessp->s_flags &= ~S_NOCTTY;
3092                 session_unlock(sessp);
3093         }
3094
3095         /*
3096          * If a controlling tty was set by the tty line discipline, then we
3097          * want to set the vp of the tty into the session structure.  We have
3098          * a race here because we can't get to the vp for the tp in ttyopen,
3099          * because it's not passed as a parameter in the open path.
3100          */
3101         if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
3102                 vnode_t ttyvp;
3103
3104                 /*
3105                  * We already have a ref from vn_open_auth(), so we can demand another reference.
3106                  */
3107                 error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE);
3108                 if (error != 0) {
3109                         panic("vnode_ref_ext() with VNODE_REF_FORCE failed?!");
3110                 }
3111
3112                 session_lock(sessp);
3113                 ttyvp = sessp->s_ttyvp;
3114                 sessp->s_ttyvp = vp;
3115                 sessp->s_ttyvid = vnode_vid(vp);
3116                 session_unlock(sessp);
3117                 if (ttyvp != NULLVP)
3118                         vnode_rele(ttyvp);
3119         }
3120
3121         vnode_put(vp);
3122
3123         proc_fdlock(p);
3124         if (flags & O_CLOEXEC)
3125                 *fdflags(p, indx) |= UF_EXCLOSE;
3126         procfdtbl_releasefd(p, indx, NULL);
3127         fp_drop(p, indx, fp, 1);
3128         proc_fdunlock(p);
3129
3130         *retval = indx;
3131
3132         if (sessp != SESSION_NULL)
3133                 session_rele(sessp);
3134         return (0);
3135 bad:
3136         if (deny_controlling_tty) {
3137                 session_lock(sessp);
3138                 sessp->s_flags &= ~S_NOCTTY;
3139                 session_unlock(sessp);
3140         }
3141         if (sessp != SESSION_NULL)
3142                 session_rele(sessp);
3143
3144         /* Modify local copy (to not damage thread copy) */
3145         context.vc_ucred = fp->f_fglob->fg_cred;
3146
3147         vn_close(vp, fp->f_fglob->fg_flag, &context);
3148         vnode_put(vp);
3149         fp_free(p, indx, fp);
3150
3151         return (error);
3152
3153 }
3154
3155 /*
3156  * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3157  *
3158  * Parameters:  p                       Process requesting the open
3159  *              uap                     User argument descriptor (see below)
3160  *              retval                  Pointer to an area to receive the
3161  *                                      return calue from the system call
3162  *
3163  * Indirect:    uap->path               Path to open (same as 'open')
3164  *              uap->flags              Flags to open (same as 'open'
3165  *              uap->uid                UID to set, if creating
3166  *              uap->gid                GID to set, if creating
3167  *              uap->mode               File mode, if creating (same as 'open')
3168  *              uap->xsecurity          ACL to set, if creating
3169  *
3170  * Returns:     0                       Success
3171  *              !0                      errno value
3172  *
3173  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
3174  *
3175  * XXX:         We should enummerate the possible errno values here, and where
3176  *              in the code they originated.
3177  */
3178 int
3179 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
3180 {
3181         struct filedesc *fdp = p->p_fd;
3182         int ciferror;
3183         kauth_filesec_t xsecdst;
3184         struct vnode_attr va;
3185         struct nameidata nd;
3186         int cmode;
3187
3188         AUDIT_ARG(owner, uap->uid, uap->gid);
3189
3190         xsecdst = NULL;
3191         if ((uap->xsecurity != USER_ADDR_NULL) &&
3192             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3193                 return ciferror;
3194
3195         VATTR_INIT(&va);
3196         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3197         VATTR_SET(&va, va_mode, cmode);
3198         if (uap->uid != KAUTH_UID_NONE)
3199                 VATTR_SET(&va, va_uid, uap->uid);
3200         if (uap->gid != KAUTH_GID_NONE)
3201                 VATTR_SET(&va, va_gid, uap->gid);
3202         if (xsecdst != NULL)
3203                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3204
3205         NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3206                uap->path, vfs_context_current());
3207
3208         ciferror = open1(vfs_context_current(), &nd, uap->flags, &va, retval);
3209         if (xsecdst != NULL)
3210                 kauth_filesec_free(xsecdst);
3211
3212         return ciferror;
3213 }
3214
3215 /*
3216  * Go through the data-protected atomically controlled open (2)
3217  *
3218  * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3219  */
3220 int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3221         int flags = uap->flags;
3222         int class = uap->class;
3223         int dpflags = uap->dpflags;
3224
3225         /*
3226          * Follow the same path as normal open(2)
3227          * Look up the item if it exists, and acquire the vnode.
3228          */
3229         struct filedesc *fdp = p->p_fd;
3230         struct vnode_attr va;
3231         struct nameidata nd;
3232         int cmode;
3233         int error;
3234
3235         VATTR_INIT(&va);
3236         /* Mask off all but regular access permissions */
3237         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3238         VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3239
3240         NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3241                uap->path, vfs_context_current());
3242
3243         /*
3244          * Initialize the extra fields in vnode_attr to pass down our
3245          * extra fields.
3246          * 1. target cprotect class.
3247          * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3248          */
3249         if (flags & O_CREAT) {
3250                 VATTR_SET(&va, va_dataprotect_class, class);
3251         }
3252
3253         if (dpflags & O_DP_GETRAWENCRYPTED) {
3254                 if ( flags & (O_RDWR | O_WRONLY)) {
3255                         /* Not allowed to write raw encrypted bytes */
3256                         return EINVAL;
3257                 }
3258                 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3259         }
3260
3261         error = open1(vfs_context_current(), &nd, uap->flags, &va, retval);
3262
3263         return error;
3264 }
3265
3266
3267 int
3268 open(proc_t p, struct open_args *uap, int32_t *retval)
3269 {
3270         __pthread_testcancel(1);
3271         return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3272 }
3273
3274 int
3275 open_nocancel(proc_t p, struct open_nocancel_args *uap, int32_t *retval)
3276 {
3277         struct filedesc *fdp = p->p_fd;
3278         struct vnode_attr va;
3279         struct nameidata nd;
3280         int cmode;
3281
3282         VATTR_INIT(&va);
3283         /* Mask off all but regular access permissions */
3284         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3285         VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3286
3287         NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3288                uap->path, vfs_context_current());
3289
3290         return(open1(vfs_context_current(), &nd, uap->flags, &va, retval));
3291 }
3292
3293
3294 /*
3295  * Create a special file.
3296  */
3297 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
3298
3299 int
3300 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
3301 {
3302         struct vnode_attr va;
3303         vfs_context_t ctx = vfs_context_current();
3304         int error;
3305         struct nameidata nd;
3306         vnode_t vp, dvp;
3307
3308         VATTR_INIT(&va);
3309         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3310         VATTR_SET(&va, va_rdev, uap->dev);
3311
3312         /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3313         if ((uap->mode & S_IFMT) == S_IFIFO)
3314                 return(mkfifo1(ctx, uap->path, &va));
3315
3316         AUDIT_ARG(mode, uap->mode);
3317         AUDIT_ARG(value32, uap->dev);
3318
3319         if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
3320                 return (error);
3321         NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
3322                 UIO_USERSPACE, uap->path, ctx);
3323         error = namei(&nd);
3324         if (error)
3325                 return (error);
3326         dvp = nd.ni_dvp;
3327         vp = nd.ni_vp;
3328
3329         if (vp != NULL) {
3330                 error = EEXIST;
3331                 goto out;
3332         }
3333
3334         switch (uap->mode & S_IFMT) {
3335         case S_IFMT:    /* used by badsect to flag bad sectors */
3336                 VATTR_SET(&va, va_type, VBAD);
3337                 break;
3338         case S_IFCHR:
3339                 VATTR_SET(&va, va_type, VCHR);
3340                 break;
3341         case S_IFBLK:
3342                 VATTR_SET(&va, va_type, VBLK);
3343                 break;
3344         default:
3345                 error = EINVAL;
3346                 goto out;
3347         }
3348
3349 #if CONFIG_MACF
3350         error = mac_vnode_check_create(ctx,
3351             nd.ni_dvp, &nd.ni_cnd, &va);
3352         if (error)
3353                 goto out;
3354 #endif
3355
3356         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3357                 goto out;
3358
3359         if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
3360                 goto out;
3361
3362         if (vp) {
3363                 int     update_flags = 0;
3364
3365                 // Make sure the name & parent pointers are hooked up
3366                 if (vp->v_name == NULL)
3367                         update_flags |= VNODE_UPDATE_NAME;
3368                 if (vp->v_parent == NULLVP)
3369                         update_flags |= VNODE_UPDATE_PARENT;
3370
3371                 if (update_flags)
3372                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3373
3374 #if CONFIG_FSE
3375                 add_fsevent(FSE_CREATE_FILE, ctx,
3376                     FSE_ARG_VNODE, vp,
3377                     FSE_ARG_DONE);
3378 #endif
3379         }
3380
3381 out:
3382         /*
3383          * nameidone has to happen before we vnode_put(dvp)
3384          * since it may need to release the fs_nodelock on the dvp
3385          */
3386         nameidone(&nd);
3387
3388         if (vp)
3389                 vnode_put(vp);
3390         vnode_put(dvp);
3391
3392         return (error);
3393 }
3394
3395 /*
3396  * Create a named pipe.
3397  *
3398  * Returns:     0                       Success
3399  *              EEXIST
3400  *      namei:???
3401  *      vnode_authorize:???
3402  *      vn_create:???
3403  */
3404 static int
3405 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
3406 {
3407         vnode_t vp, dvp;
3408         int error;
3409         struct nameidata nd;
3410
3411         NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
3412                 UIO_USERSPACE, upath, ctx);
3413         error = namei(&nd);
3414         if (error)
3415                 return (error);
3416         dvp = nd.ni_dvp;
3417         vp = nd.ni_vp;
3418
3419         /* check that this is a new file and authorize addition */
3420         if (vp != NULL) {
3421                 error = EEXIST;
3422                 goto out;
3423         }
3424         VATTR_SET(vap, va_type, VFIFO);
3425
3426         if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
3427                 goto out;
3428
3429         error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
3430 out:
3431         /*
3432          * nameidone has to happen before we vnode_put(dvp)
3433          * since it may need to release the fs_nodelock on the dvp
3434          */
3435         nameidone(&nd);
3436
3437         if (vp)
3438                 vnode_put(vp);
3439         vnode_put(dvp);
3440
3441         return error;
3442 }
3443
3444
3445 /*
3446  * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
3447  *
3448  * Parameters:  p                       Process requesting the open
3449  *              uap                     User argument descriptor (see below)
3450  *              retval                  (Ignored)
3451  *
3452  * Indirect:    uap->path               Path to fifo (same as 'mkfifo')
3453  *              uap->uid                UID to set
3454  *              uap->gid                GID to set
3455  *              uap->mode               File mode to set (same as 'mkfifo')
3456  *              uap->xsecurity          ACL to set, if creating
3457  *
3458  * Returns:     0                       Success
3459  *              !0                      errno value
3460  *
3461  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
3462  *
3463  * XXX:         We should enummerate the possible errno values here, and where
3464  *              in the code they originated.
3465  */
3466 int
3467 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
3468 {
3469         int ciferror;
3470         kauth_filesec_t xsecdst;
3471         struct vnode_attr va;
3472
3473         AUDIT_ARG(owner, uap->uid, uap->gid);
3474
3475         xsecdst = KAUTH_FILESEC_NONE;
3476         if (uap->xsecurity != USER_ADDR_NULL) {
3477                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
3478                         return ciferror;
3479         }
3480
3481         VATTR_INIT(&va);
3482         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3483         if (uap->uid != KAUTH_UID_NONE)
3484                 VATTR_SET(&va, va_uid, uap->uid);
3485         if (uap->gid != KAUTH_GID_NONE)
3486                 VATTR_SET(&va, va_gid, uap->gid);
3487         if (xsecdst != KAUTH_FILESEC_NONE)
3488                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3489
3490         ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
3491
3492         if (xsecdst != KAUTH_FILESEC_NONE)
3493                 kauth_filesec_free(xsecdst);
3494         return ciferror;
3495 }
3496
3497 /* ARGSUSED */
3498 int
3499 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
3500 {
3501         struct vnode_attr va;
3502
3503         VATTR_INIT(&va);
3504         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3505
3506         return(mkfifo1(vfs_context_current(), uap->path, &va));
3507 }
3508
3509
3510 static char *
3511 my_strrchr(char *p, int ch)
3512 {
3513         char *save;
3514
3515         for (save = NULL;; ++p) {
3516                 if (*p == ch)
3517                         save = p;
3518                 if (!*p)
3519                         return(save);
3520         }
3521         /* NOTREACHED */
3522 }
3523
3524 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
3525
3526 int
3527 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
3528 {
3529         int ret, len = _len;
3530
3531         *truncated_path = 0;
3532         ret = vn_getpath(dvp, path, &len);
3533         if (ret == 0 && len < (MAXPATHLEN - 1)) {
3534                 if (leafname) {
3535                         path[len-1] = '/';
3536                         len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
3537                         if (len > MAXPATHLEN) {
3538                                 char *ptr;
3539
3540                                 // the string got truncated!
3541                                 *truncated_path = 1;
3542                                 ptr = my_strrchr(path, '/');
3543                                 if (ptr) {
3544                                         *ptr = '\0';   // chop off the string at the last directory component
3545                                 }
3546                                 len = strlen(path) + 1;
3547                         }
3548                 }
3549         } else if (ret == 0) {
3550                 *truncated_path = 1;
3551         } else if (ret != 0) {
3552                 struct vnode *mydvp=dvp;
3553
3554                 if (ret != ENOSPC) {
3555                         printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
3556                                dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
3557                 }
3558                 *truncated_path = 1;
3559
3560                 do {
3561                         if (mydvp->v_parent != NULL) {
3562                                 mydvp = mydvp->v_parent;
3563                         } else if (mydvp->v_mount) {
3564                                 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
3565                                 break;
3566                         } else {
3567                                 // no parent and no mount point?  only thing is to punt and say "/" changed
3568                                 strlcpy(path, "/", _len);
3569                                 len = 2;
3570                                 mydvp = NULL;
3571                         }
3572
3573                         if (mydvp == NULL) {
3574                                 break;
3575                         }
3576
3577                         len = _len;
3578                         ret = vn_getpath(mydvp, path, &len);
3579                 } while (ret == ENOSPC);
3580         }
3581
3582         return len;
3583 }
3584
3585
3586 /*
3587  * Make a hard file link.
3588  *
3589  * Returns:     0                       Success
3590  *              EPERM
3591  *              EEXIST
3592  *              EXDEV
3593  *      namei:???
3594  *      vnode_authorize:???
3595  *      VNOP_LINK:???
3596  */
3597 /* ARGSUSED */
3598 int
3599 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
3600 {
3601         vnode_t vp, dvp, lvp;
3602         struct nameidata nd;
3603         vfs_context_t ctx = vfs_context_current();
3604         int error;
3605 #if CONFIG_FSE
3606         fse_info finfo;
3607 #endif
3608         int need_event, has_listeners;
3609         char *target_path = NULL;
3610         int truncated=0;
3611
3612         vp = dvp = lvp = NULLVP;
3613
3614         /* look up the object we are linking to */
3615         NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1,
3616                 UIO_USERSPACE, uap->path, ctx);
3617         error = namei(&nd);
3618         if (error)
3619                 return (error);
3620         vp = nd.ni_vp;
3621
3622         nameidone(&nd);
3623
3624         /*
3625          * Normally, linking to directories is not supported.
3626          * However, some file systems may have limited support.
3627          */
3628         if (vp->v_type == VDIR) {
3629                 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
3630                         error = EPERM;   /* POSIX */
3631                         goto out;
3632                 }
3633                 /* Linking to a directory requires ownership. */
3634                 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
3635                         struct vnode_attr dva;
3636
3637                         VATTR_INIT(&dva);
3638                         VATTR_WANTED(&dva, va_uid);
3639                         if (vnode_getattr(vp, &dva, ctx) != 0 ||
3640                             !VATTR_IS_SUPPORTED(&dva, va_uid) ||
3641                             (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
3642                                 error = EACCES;
3643                                 goto out;
3644                         }
3645                 }
3646         }
3647
3648         /* lookup the target node */
3649 #if CONFIG_TRIGGERS
3650         nd.ni_op = OP_LINK;
3651 #endif
3652         nd.ni_cnd.cn_nameiop = CREATE;
3653         nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
3654         nd.ni_dirp = uap->link;
3655         error = namei(&nd);
3656         if (error != 0)
3657                 goto out;
3658         dvp = nd.ni_dvp;
3659         lvp = nd.ni_vp;
3660
3661 #if CONFIG_MACF
3662         if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
3663                 goto out2;
3664 #endif
3665
3666         /* or to anything that kauth doesn't want us to (eg. immutable items) */
3667         if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
3668                 goto out2;
3669
3670         /* target node must not exist */
3671         if (lvp != NULLVP) {
3672                 error = EEXIST;
3673                 goto out2;
3674         }
3675         /* cannot link across mountpoints */
3676         if (vnode_mount(vp) != vnode_mount(dvp)) {
3677                 error = EXDEV;
3678                 goto out2;
3679         }
3680
3681         /* authorize creation of the target note */
3682         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3683                 goto out2;
3684
3685         /* and finally make the link */
3686         error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
3687         if (error)
3688                 goto out2;
3689
3690 #if CONFIG_FSE
3691         need_event = need_fsevent(FSE_CREATE_FILE, dvp);
3692 #else
3693         need_event = 0;
3694 #endif
3695         has_listeners = kauth_authorize_fileop_has_listeners();
3696
3697         if (need_event || has_listeners) {
3698                 char *link_to_path = NULL;
3699                 int len, link_name_len;
3700
3701                 /* build the path to the new link file */
3702                 GET_PATH(target_path);
3703                 if (target_path == NULL) {
3704                         error = ENOMEM;
3705                         goto out2;
3706                 }
3707
3708                 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
3709
3710                 if (has_listeners) {
3711                         /* build the path to file we are linking to */
3712                         GET_PATH(link_to_path);
3713                         if (link_to_path == NULL) {
3714                                 error = ENOMEM;
3715                                 goto out2;
3716                         }
3717
3718                         link_name_len = MAXPATHLEN;
3719                         vn_getpath(vp, link_to_path, &link_name_len);
3720
3721                         /*
3722                          * Call out to allow 3rd party notification of rename.
3723                          * Ignore result of kauth_authorize_fileop call.
3724                          */
3725                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
3726                                                (uintptr_t)link_to_path, (uintptr_t)target_path);
3727                         if (link_to_path != NULL) {
3728                                 RELEASE_PATH(link_to_path);
3729                         }
3730                 }
3731 #if CONFIG_FSE
3732                 if (need_event) {
3733                         /* construct fsevent */
3734                         if (get_fse_info(vp, &finfo, ctx) == 0) {
3735                                 if (truncated) {
3736                                         finfo.mode |= FSE_TRUNCATED_PATH;
3737                                 }
3738
3739                                 // build the path to the destination of the link
3740                                 add_fsevent(FSE_CREATE_FILE, ctx,
3741                                             FSE_ARG_STRING, len, target_path,
3742                                             FSE_ARG_FINFO, &finfo,
3743                                             FSE_ARG_DONE);
3744                         }
3745                         if (vp->v_parent) {
3746                             add_fsevent(FSE_STAT_CHANGED, ctx,
3747                                 FSE_ARG_VNODE, vp->v_parent,
3748                                 FSE_ARG_DONE);
3749                         }
3750                 }
3751 #endif
3752         }
3753 out2:
3754         /*
3755          * nameidone has to happen before we vnode_put(dvp)
3756          * since it may need to release the fs_nodelock on the dvp
3757          */
3758         nameidone(&nd);
3759         if (target_path != NULL) {
3760                 RELEASE_PATH(target_path);
3761         }
3762 out:
3763         if (lvp)
3764                 vnode_put(lvp);
3765         if (dvp)
3766                 vnode_put(dvp);
3767         vnode_put(vp);
3768         return (error);
3769 }
3770
3771 /*
3772  * Make a symbolic link.
3773  *
3774  * We could add support for ACLs here too...
3775  */
3776 /* ARGSUSED */
3777 int
3778 symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval)
3779 {
3780         struct vnode_attr va;
3781         char *path;
3782         int error;
3783         struct nameidata nd;
3784         vfs_context_t ctx = vfs_context_current();
3785         vnode_t vp, dvp;
3786         size_t dummy=0;
3787
3788         MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
3789         error = copyinstr(uap->path, path, MAXPATHLEN, &dummy);
3790         if (error)
3791                 goto out;
3792         AUDIT_ARG(text, path);  /* This is the link string */
3793
3794         NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
3795                 UIO_USERSPACE, uap->link, ctx);
3796         error = namei(&nd);
3797         if (error)
3798                 goto out;
3799         dvp = nd.ni_dvp;
3800         vp = nd.ni_vp;
3801
3802         VATTR_INIT(&va);
3803         VATTR_SET(&va, va_type, VLNK);
3804         VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
3805 #if CONFIG_MACF
3806         error = mac_vnode_check_create(ctx,
3807                         dvp, &nd.ni_cnd, &va);
3808 #endif
3809         if (error != 0) {
3810             goto skipit;
3811         }
3812
3813         if (vp != NULL) {
3814             error = EEXIST;
3815             goto skipit;
3816         }
3817
3818         /* authorize */
3819         if (error == 0)
3820                 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
3821         /* get default ownership, etc. */
3822         if (error == 0)
3823                 error = vnode_authattr_new(dvp, &va, 0, ctx);
3824         if (error == 0)
3825                 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
3826
3827         /* do fallback attribute handling */
3828         if (error == 0)
3829                 error = vnode_setattr_fallback(vp, &va, ctx);
3830
3831         if (error == 0) {
3832                 int     update_flags = 0;
3833
3834                 if (vp == NULL) {
3835                         nd.ni_cnd.cn_nameiop = LOOKUP;
3836 #if CONFIG_TRIGGERS
3837                         nd.ni_op = OP_LOOKUP;
3838 #endif
3839                         nd.ni_cnd.cn_flags = 0;
3840                         error = namei(&nd);
3841                         vp = nd.ni_vp;
3842
3843                         if (vp == NULL)
3844                                 goto skipit;
3845                 }
3846
3847 #if 0  /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
3848                 /* call out to allow 3rd party notification of rename.
3849                  * Ignore result of kauth_authorize_fileop call.
3850                  */
3851                 if (kauth_authorize_fileop_has_listeners() &&
3852                     namei(&nd) == 0) {
3853                         char *new_link_path = NULL;
3854                         int             len;
3855
3856                         /* build the path to the new link file */
3857                         new_link_path = get_pathbuff();
3858                         len = MAXPATHLEN;
3859                         vn_getpath(dvp, new_link_path, &len);
3860                         if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
3861                                 new_link_path[len - 1] = '/';
3862                                 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
3863                         }
3864
3865                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
3866                                            (uintptr_t)path, (uintptr_t)new_link_path);
3867                         if (new_link_path != NULL)
3868                                 release_pathbuff(new_link_path);
3869                 }
3870 #endif
3871                 // Make sure the name & parent pointers are hooked up
3872                 if (vp->v_name == NULL)
3873                         update_flags |= VNODE_UPDATE_NAME;
3874                 if (vp->v_parent == NULLVP)
3875                         update_flags |= VNODE_UPDATE_PARENT;
3876
3877                 if (update_flags)
3878                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3879
3880 #if CONFIG_FSE
3881                 add_fsevent(FSE_CREATE_FILE, ctx,
3882                             FSE_ARG_VNODE, vp,
3883                             FSE_ARG_DONE);
3884 #endif
3885         }
3886
3887 skipit:
3888         /*
3889          * nameidone has to happen before we vnode_put(dvp)
3890          * since it may need to release the fs_nodelock on the dvp
3891          */
3892         nameidone(&nd);
3893
3894         if (vp)
3895                 vnode_put(vp);
3896         vnode_put(dvp);
3897 out:
3898         FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
3899
3900         return (error);
3901 }
3902
3903 /*
3904  * Delete a whiteout from the filesystem.
3905  * XXX authorization not implmented for whiteouts
3906  */
3907 int
3908 undelete(__unused proc_t p, struct undelete_args *uap, __unused int32_t *retval)
3909 {
3910         int error;
3911         struct nameidata nd;
3912         vfs_context_t ctx = vfs_context_current();
3913         vnode_t vp, dvp;
3914
3915         NDINIT(&nd, DELETE, OP_UNLINK, LOCKPARENT | DOWHITEOUT | AUDITVNPATH1,
3916                 UIO_USERSPACE, uap->path, ctx);
3917         error = namei(&nd);
3918         if (error)
3919                 return (error);
3920         dvp = nd.ni_dvp;
3921         vp = nd.ni_vp;
3922
3923         if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) {
3924                 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, ctx);
3925         } else
3926                 error = EEXIST;
3927
3928         /*
3929          * nameidone has to happen before we vnode_put(dvp)
3930          * since it may need to release the fs_nodelock on the dvp
3931          */
3932         nameidone(&nd);
3933
3934         if (vp)
3935                 vnode_put(vp);
3936         vnode_put(dvp);
3937
3938         return (error);
3939 }
3940
3941
3942 /*
3943  * Delete a name from the filesystem.
3944  */
3945 /* ARGSUSED */
3946 int
3947 unlink1(vfs_context_t ctx, struct nameidata *ndp, int unlink_flags)
3948 {
3949         vnode_t vp, dvp;
3950         int error;
3951         struct componentname *cnp;
3952         char  *path = NULL;
3953         int  len=0;
3954 #if CONFIG_FSE
3955         fse_info  finfo;
3956         struct vnode_attr va;
3957 #endif
3958         int flags = 0;
3959         int need_event = 0;
3960         int has_listeners = 0;
3961         int truncated_path=0;
3962         int batched;
3963         struct vnode_attr *vap = NULL;
3964
3965 #if NAMEDRSRCFORK
3966         /* unlink or delete is allowed on rsrc forks and named streams */
3967         ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
3968 #endif
3969
3970         ndp->ni_cnd.cn_flags |= LOCKPARENT;
3971         ndp->ni_flag |= NAMEI_COMPOUNDREMOVE;
3972         cnp = &ndp->ni_cnd;
3973
3974 lookup_continue:
3975         error = namei(ndp);
3976         if (error)
3977                 return (error);
3978
3979         dvp = ndp->ni_dvp;
3980         vp = ndp->ni_vp;
3981
3982
3983         /* With Carbon delete semantics, busy files cannot be deleted */
3984         if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
3985                 flags |= VNODE_REMOVE_NODELETEBUSY;
3986         }
3987
3988         /* If we're told to, then skip any potential future upcalls */
3989         if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
3990                 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
3991         }
3992
3993
3994         if (vp) {
3995                 batched = vnode_compound_remove_available(vp);
3996                 /*
3997                  * The root of a mounted filesystem cannot be deleted.
3998                  */
3999                 if (vp->v_flag & VROOT) {
4000                         error = EBUSY;
4001                 }
4002
4003                 if (!batched) {
4004                         error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4005                         if (error) {
4006                                 goto out;
4007                         }
4008                 }
4009         } else {
4010                 batched = 1;
4011
4012                 if (!vnode_compound_remove_available(dvp)) {
4013                         panic("No vp, but no compound remove?");
4014                 }
4015         }
4016
4017 #if CONFIG_FSE
4018         need_event = need_fsevent(FSE_DELETE, dvp);
4019         if (need_event) {
4020                 if (!batched) {
4021                         if ((vp->v_flag & VISHARDLINK) == 0) {
4022                                 /* XXX need to get these data in batched VNOP */
4023                                 get_fse_info(vp, &finfo, ctx);
4024                         }
4025                 } else {
4026                         error = vfs_get_notify_attributes(&va);
4027                         if (error) {
4028                                 goto out;
4029                         }
4030
4031                         vap = &va;
4032                 }
4033         }
4034 #endif
4035         has_listeners = kauth_authorize_fileop_has_listeners();
4036         if (need_event || has_listeners) {
4037                 if (path == NULL) {
4038                         GET_PATH(path);
4039                         if (path == NULL) {
4040                                 error = ENOMEM;
4041                                 goto out;
4042                         }
4043                 }
4044                 len = safe_getpath(dvp, ndp->ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
4045         }
4046
4047 #if NAMEDRSRCFORK
4048         if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK)
4049                 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4050         else
4051 #endif
4052         {
4053                 error = vn_remove(dvp, &ndp->ni_vp, ndp, flags, vap, ctx);
4054                 vp = ndp->ni_vp;
4055                 if (error == EKEEPLOOKING) {
4056                         if (!batched) {
4057                                 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4058                         }
4059
4060                         if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
4061                                 panic("EKEEPLOOKING, but continue flag not set?");
4062                         }
4063
4064                         if (vnode_isdir(vp)) {
4065                                 error = EISDIR;
4066                                 goto out;
4067                         }
4068                         goto lookup_continue;
4069                 }
4070         }
4071
4072         /*
4073          * Call out to allow 3rd party notification of delete.
4074          * Ignore result of kauth_authorize_fileop call.
4075          */
4076         if (!error) {
4077                 if (has_listeners) {
4078                         kauth_authorize_fileop(vfs_context_ucred(ctx),
4079                                 KAUTH_FILEOP_DELETE,
4080                                 (uintptr_t)vp,
4081                                 (uintptr_t)path);
4082                 }
4083
4084                 if (vp->v_flag & VISHARDLINK) {
4085                     //
4086                     // if a hardlink gets deleted we want to blow away the
4087                     // v_parent link because the path that got us to this
4088                     // instance of the link is no longer valid.  this will
4089                     // force the next call to get the path to ask the file
4090                     // system instead of just following the v_parent link.
4091                     //
4092                     vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
4093                 }
4094
4095 #if CONFIG_FSE
4096                 if (need_event) {
4097                         if (vp->v_flag & VISHARDLINK) {
4098                                 get_fse_info(vp, &finfo, ctx);
4099                         } else if (vap) {
4100                                 vnode_get_fse_info_from_vap(vp, &finfo, vap);
4101                         }
4102                         if (truncated_path) {
4103                                 finfo.mode |= FSE_TRUNCATED_PATH;
4104                         }
4105                         add_fsevent(FSE_DELETE, ctx,
4106                                                 FSE_ARG_STRING, len, path,
4107                                                 FSE_ARG_FINFO, &finfo,
4108                                                 FSE_ARG_DONE);
4109                 }
4110 #endif
4111         }
4112
4113 out:
4114         if (path != NULL)
4115                 RELEASE_PATH(path);
4116
4117 #if NAMEDRSRCFORK
4118         /* recycle the deleted rsrc fork vnode to force a reclaim, which
4119          * will cause its shadow file to go away if necessary.
4120          */
4121          if (vp && (vnode_isnamedstream(vp)) &&
4122                 (vp->v_parent != NULLVP) &&
4123                 vnode_isshadow(vp)) {
4124                         vnode_recycle(vp);
4125          }
4126 #endif
4127         /*
4128          * nameidone has to happen before we vnode_put(dvp)
4129          * since it may need to release the fs_nodelock on the dvp
4130          */
4131         nameidone(ndp);
4132         vnode_put(dvp);
4133         if (vp) {
4134                 vnode_put(vp);
4135         }
4136         return (error);
4137 }
4138
4139 /*
4140  * Delete a name from the filesystem using POSIX semantics.
4141  */
4142 int
4143 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
4144 {
4145         struct nameidata nd;
4146         vfs_context_t ctx = vfs_context_current();
4147
4148         NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE,
4149                uap->path, ctx);
4150         return unlink1(ctx, &nd, 0);
4151 }
4152
4153 /*
4154  * Delete a name from the filesystem using Carbon semantics.
4155  */
4156 int
4157 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
4158 {
4159         struct nameidata nd;
4160         vfs_context_t ctx = vfs_context_current();
4161
4162         NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE,
4163                uap->path, ctx);
4164         return unlink1(ctx, &nd, VNODE_REMOVE_NODELETEBUSY);
4165 }
4166
4167 /*
4168  * Reposition read/write file offset.
4169  */
4170 int
4171 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
4172 {
4173         struct fileproc *fp;
4174         vnode_t vp;
4175         struct vfs_context *ctx;
4176         off_t offset = uap->offset, file_size;
4177         int error;
4178
4179         if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4180                 if (error == ENOTSUP)
4181                         return (ESPIPE);
4182                 return (error);
4183         }
4184         if (vnode_isfifo(vp)) {
4185                 file_drop(uap->fd);
4186                 return(ESPIPE);
4187         }
4188
4189
4190         ctx = vfs_context_current();
4191 #if CONFIG_MACF
4192         if (uap->whence == L_INCR && uap->offset == 0)
4193                 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4194                     fp->f_fglob);
4195         else
4196                 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4197                     fp->f_fglob);
4198         if (error) {
4199                 file_drop(uap->fd);
4200                 return (error);
4201         }
4202 #endif
4203         if ( (error = vnode_getwithref(vp)) ) {
4204                 file_drop(uap->fd);
4205                 return(error);
4206         }
4207
4208         switch (uap->whence) {
4209         case L_INCR:
4210                 offset += fp->f_fglob->fg_offset;
4211                 break;
4212         case L_XTND:
4213                 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
4214                         break;
4215                 offset += file_size;
4216                 break;
4217         case L_SET:
4218                 break;
4219         default:
4220                 error = EINVAL;
4221         }
4222         if (error == 0) {
4223                 if (uap->offset > 0 && offset < 0) {
4224                         /* Incremented/relative move past max size */
4225                         error = EOVERFLOW;
4226                 } else {
4227                         /*
4228                          * Allow negative offsets on character devices, per
4229                          * POSIX 1003.1-2001.  Most likely for writing disk
4230                          * labels.
4231                          */
4232                         if (offset < 0 && vp->v_type != VCHR) {
4233                                 /* Decremented/relative move before start */
4234                                 error = EINVAL;
4235                         } else {
4236                                 /* Success */
4237                                 fp->f_fglob->fg_offset = offset;
4238                                 *retval = fp->f_fglob->fg_offset;
4239                         }
4240                 }
4241         }
4242
4243         /*
4244          * An lseek can affect whether data is "available to read."  Use
4245          * hint of NOTE_NONE so no EVFILT_VNODE events fire
4246          */
4247         post_event_if_success(vp, error, NOTE_NONE);
4248         (void)vnode_put(vp);
4249         file_drop(uap->fd);
4250         return (error);
4251 }
4252
4253
4254 /*
4255  * Check access permissions.
4256  *
4257  * Returns:     0                       Success
4258  *              vnode_authorize:???
4259  */
4260 static int
4261 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
4262 {
4263         kauth_action_t action;
4264         int error;
4265
4266         /*
4267          * If just the regular access bits, convert them to something
4268          * that vnode_authorize will understand.
4269          */
4270         if (!(uflags & _ACCESS_EXTENDED_MASK)) {
4271                 action = 0;
4272                 if (uflags & R_OK)
4273                         action |= KAUTH_VNODE_READ_DATA;        /* aka KAUTH_VNODE_LIST_DIRECTORY */
4274                 if (uflags & W_OK) {
4275                         if (vnode_isdir(vp)) {
4276                                 action |= KAUTH_VNODE_ADD_FILE |
4277                                     KAUTH_VNODE_ADD_SUBDIRECTORY;
4278                                 /* might want delete rights here too */
4279                         } else {
4280                                 action |= KAUTH_VNODE_WRITE_DATA;
4281                         }
4282                 }
4283                 if (uflags & X_OK) {
4284                         if (vnode_isdir(vp)) {
4285                                 action |= KAUTH_VNODE_SEARCH;
4286                         } else {
4287                                 action |= KAUTH_VNODE_EXECUTE;
4288                         }
4289                 }
4290         } else {
4291                 /* take advantage of definition of uflags */
4292                 action = uflags >> 8;
4293         }
4294
4295 #if CONFIG_MACF
4296         error = mac_vnode_check_access(ctx, vp, uflags);
4297         if (error)
4298                 return (error);
4299 #endif /* MAC */
4300
4301         /* action == 0 means only check for existence */
4302         if (action != 0) {
4303                 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
4304         } else {
4305                 error = 0;
4306         }
4307
4308         return(error);
4309 }
4310
4311
4312
4313 /*
4314  * access_extended: Check access permissions in bulk.
4315  *
4316  * Description: uap->entries            Pointer to an array of accessx
4317  *                                      descriptor structs, plus one or
4318  *                                      more NULL terminated strings (see
4319  *                                      "Notes" section below).
4320  *              uap->size               Size of the area pointed to by
4321  *                                      uap->entries.
4322  *              uap->results            Pointer to the results array.
4323  *
4324  * Returns:     0                       Success
4325  *              ENOMEM                  Insufficient memory
4326  *              EINVAL                  Invalid arguments
4327  *              namei:EFAULT            Bad address
4328  *              namei:ENAMETOOLONG      Filename too long
4329  *              namei:ENOENT            No such file or directory
4330  *              namei:ELOOP             Too many levels of symbolic links
4331  *              namei:EBADF             Bad file descriptor
4332  *              namei:ENOTDIR           Not a directory
4333  *              namei:???
4334  *              access1:
4335  *
4336  * Implicit returns:
4337  *              uap->results            Array contents modified
4338  *
4339  * Notes:       The uap->entries are structured as an arbitrary length array
4340  *              of accessx descriptors, followed by one or more NULL terminated
4341  *              strings
4342  *
4343  *                      struct accessx_descriptor[0]
4344  *                      ...
4345  *                      struct accessx_descriptor[n]
4346  *                      char name_data[0];
4347  *
4348  *              We determine the entry count by walking the buffer containing
4349  *              the uap->entries argument descriptor.  For each descriptor we
4350  *              see, the valid values for the offset ad_name_offset will be
4351  *              in the byte range:
4352  *
4353  *                      [ uap->entries + sizeof(struct accessx_descriptor) ]
4354  *                                              to
4355  *                              [ uap->entries + uap->size - 2 ]
4356  *
4357  *              since we must have at least one string, and the string must
4358  *              be at least one character plus the NULL terminator in length.
4359  *
4360  * XXX:         Need to support the check-as uid argument
4361  */
4362 int
4363 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
4364 {
4365         struct accessx_descriptor *input = NULL;
4366         errno_t *result = NULL;
4367         errno_t error = 0;
4368         int wantdelete = 0;
4369         unsigned int desc_max, desc_actual, i, j;
4370         struct vfs_context context;
4371         struct nameidata nd;
4372         int niopts;
4373         vnode_t vp = NULL;
4374         vnode_t dvp = NULL;
4375 #define ACCESSX_MAX_DESCR_ON_STACK 10
4376         struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
4377
4378         context.vc_ucred = NULL;
4379
4380         /*
4381          * Validate parameters; if valid, copy the descriptor array and string
4382          * arguments into local memory.  Before proceeding, the following
4383          * conditions must have been met:
4384          *
4385          * o    The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
4386          * o    There must be sufficient room in the request for at least one
4387          *      descriptor and a one yte NUL terminated string.
4388          * o    The allocation of local storage must not fail.
4389          */
4390         if (uap->size > ACCESSX_MAX_TABLESIZE)
4391                 return(ENOMEM);
4392         if (uap->size < (sizeof(struct accessx_descriptor) + 2))
4393                 return(EINVAL);
4394         if (uap->size <= sizeof (stack_input)) {
4395                 input = stack_input;
4396         } else {
4397         MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
4398         if (input == NULL) {
4399                 error = ENOMEM;
4400                 goto out;
4401         }
4402         }
4403         error = copyin(uap->entries, input, uap->size);
4404         if (error)
4405                 goto out;
4406
4407         AUDIT_ARG(opaque, input, uap->size);
4408
4409         /*
4410          * Force NUL termination of the copyin buffer to avoid nami() running
4411          * off the end.  If the caller passes us bogus data, they may get a
4412          * bogus result.
4413          */
4414         ((char *)input)[uap->size - 1] = 0;
4415
4416         /*
4417          * Access is defined as checking against the process' real identity,
4418          * even if operations are checking the effective identity.  This
4419          * requires that we use a local vfs context.
4420          */
4421         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
4422         context.vc_thread = current_thread();
4423
4424         /*
4425          * Find out how many entries we have, so we can allocate the result
4426          * array by walking the list and adjusting the count downward by the
4427          * earliest string offset we see.
4428          */
4429         desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
4430         desc_actual = desc_max;
4431         for (i = 0; i < desc_actual; i++) {
4432                 /*
4433                  * Take the offset to the name string for this entry and
4434                  * convert to an input array index, which would be one off
4435                  * the end of the array if this entry was the lowest-addressed
4436                  * name string.
4437                  */
4438                 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
4439
4440                 /*
4441                  * An offset greater than the max allowable offset is an error.
4442                  * It is also an error for any valid entry to point
4443                  * to a location prior to the end of the current entry, if
4444                  * it's not a reference to the string of the previous entry.
4445                  */
4446                 if (j > desc_max || (j != 0 && j <= i)) {
4447                         error = EINVAL;
4448                         goto out;
4449                 }
4450
4451                 /*
4452                  * An offset of 0 means use the previous descriptor's offset;
4453                  * this is used to chain multiple requests for the same file
4454                  * to avoid multiple lookups.
4455                  */
4456                 if (j == 0) {
4457                         /* This is not valid for the first entry */
4458                         if (i == 0) {
4459                                 error = EINVAL;
4460                                 goto out;
4461                         }
4462                         continue;
4463                 }
4464
4465                 /*
4466                  * If the offset of the string for this descriptor is before
4467                  * what we believe is the current actual last descriptor,
4468                  * then we need to adjust our estimate downward; this permits
4469                  * the string table following the last descriptor to be out
4470                  * of order relative to the descriptor list.
4471                  */
4472                 if (j < desc_actual)
4473                         desc_actual = j;
4474         }
4475
4476         /*
4477          * We limit the actual number of descriptors we are willing to process
4478          * to a hard maximum of ACCESSX_MAX_DESCRIPTORS.  If the number being
4479          * requested does not exceed this limit,
4480          */
4481         if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
4482                 error = ENOMEM;
4483                 goto out;
4484         }
4485         MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
4486         if (result == NULL) {
4487                 error = ENOMEM;
4488                 goto out;
4489         }
4490
4491         /*
4492          * Do the work by iterating over the descriptor entries we know to
4493          * at least appear to contain valid data.
4494          */
4495         error = 0;
4496         for (i = 0; i < desc_actual; i++) {
4497                 /*
4498                  * If the ad_name_offset is 0, then we use the previous
4499                  * results to make the check; otherwise, we are looking up
4500                  * a new file name.
4501                  */
4502                 if (input[i].ad_name_offset != 0) {
4503                         /* discard old vnodes */
4504                         if (vp) {
4505                                 vnode_put(vp);
4506                                 vp = NULL;
4507                         }
4508                         if (dvp) {
4509                                 vnode_put(dvp);
4510                                 dvp = NULL;
4511                         }
4512
4513                         /*
4514                          * Scan forward in the descriptor list to see if we
4515                          * need the parent vnode.  We will need it if we are
4516                          * deleting, since we must have rights  to remove
4517                          * entries in the parent directory, as well as the
4518                          * rights to delete the object itself.
4519                          */
4520                         wantdelete = input[i].ad_flags & _DELETE_OK;
4521                         for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
4522                                 if (input[j].ad_flags & _DELETE_OK)
4523                                         wantdelete = 1;
4524
4525                         niopts = FOLLOW | AUDITVNPATH1;
4526
4527                         /* need parent for vnode_authorize for deletion test */
4528                         if (wantdelete)
4529                                 niopts |= WANTPARENT;
4530
4531                         /* do the lookup */
4532                         NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
4533                                CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
4534                                &context);
4535                         error = namei(&nd);
4536                         if (!error) {
4537                                 vp = nd.ni_vp;
4538                                 if (wantdelete)
4539                                         dvp = nd.ni_dvp;
4540                         }
4541                         nameidone(&nd);
4542                 }
4543
4544                 /*
4545                  * Handle lookup errors.
4546                  */
4547                 switch(error) {
4548                 case ENOENT:
4549                 case EACCES:
4550                 case EPERM:
4551                 case ENOTDIR:
4552                         result[i] = error;
4553                         break;
4554                 case 0:
4555                         /* run this access check */
4556                         result[i] = access1(vp, dvp, input[i].ad_flags, &context);
4557                         break;
4558                 default:
4559                         /* fatal lookup error */
4560
4561                         goto out;
4562                 }
4563         }
4564
4565         AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
4566
4567         /* copy out results */
4568         error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
4569
4570 out:
4571         if (input && input != stack_input)
4572                 FREE(input, M_TEMP);
4573         if (result)
4574                 FREE(result, M_TEMP);
4575         if (vp)
4576                 vnode_put(vp);
4577         if (dvp)
4578                 vnode_put(dvp);
4579         if (IS_VALID_CRED(context.vc_ucred))
4580                 kauth_cred_unref(&context.vc_ucred);
4581         return(error);
4582 }
4583
4584
4585 /*
4586  * Returns:     0                       Success
4587  *              namei:EFAULT            Bad address
4588  *              namei:ENAMETOOLONG      Filename too long
4589  *              namei:ENOENT            No such file or directory
4590  *              namei:ELOOP             Too many levels of symbolic links
4591  *              namei:EBADF             Bad file descriptor
4592  *              namei:ENOTDIR           Not a directory
4593  *              namei:???
4594  *              access1:
4595  */
4596 int
4597 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
4598 {
4599         int error;
4600         struct nameidata nd;
4601         int niopts;
4602         struct vfs_context context;
4603 #if NAMEDRSRCFORK
4604         int is_namedstream = 0;
4605 #endif
4606
4607         /*
4608          * Access is defined as checking against the process'
4609          * real identity, even if operations are checking the
4610          * effective identity.  So we need to tweak the credential
4611          * in the context.
4612          */
4613         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
4614         context.vc_thread = current_thread();
4615
4616         niopts = FOLLOW | AUDITVNPATH1;
4617         /* need parent for vnode_authorize for deletion test */
4618         if (uap->flags & _DELETE_OK)
4619                 niopts |= WANTPARENT;
4620         NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_USERSPACE,
4621                uap->path, &context);
4622
4623 #if NAMEDRSRCFORK
4624         /* access(F_OK) calls are allowed for resource forks. */
4625         if (uap->flags == F_OK)
4626                 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4627 #endif
4628         error = namei(&nd);
4629         if (error)
4630                 goto out;
4631
4632 #if NAMEDRSRCFORK
4633         /* Grab reference on the shadow stream file vnode to
4634          * force an inactive on release which will mark it
4635          * for recycle.
4636          */
4637         if (vnode_isnamedstream(nd.ni_vp) &&
4638             (nd.ni_vp->v_parent != NULLVP) &&
4639             vnode_isshadow(nd.ni_vp)) {
4640                 is_namedstream = 1;
4641                 vnode_ref(nd.ni_vp);
4642         }
4643 #endif
4644
4645         error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context);
4646
4647 #if NAMEDRSRCFORK
4648         if (is_namedstream) {
4649                 vnode_rele(nd.ni_vp);
4650         }
4651 #endif
4652
4653         vnode_put(nd.ni_vp);
4654         if (uap->flags & _DELETE_OK)
4655                 vnode_put(nd.ni_dvp);
4656         nameidone(&nd);
4657
4658 out:
4659         kauth_cred_unref(&context.vc_ucred);
4660         return(error);
4661 }
4662
4663
4664 /*
4665  * Returns:     0                       Success
4666  *              EFAULT
4667  *      copyout:EFAULT
4668  *      namei:???
4669  *      vn_stat:???
4670  */
4671 static int
4672 stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4673 {
4674         union {
4675                 struct stat sb;
4676                 struct stat64 sb64;
4677         } source;
4678         union {
4679                 struct user64_stat user64_sb;
4680                 struct user32_stat user32_sb;
4681                 struct user64_stat64 user64_sb64;
4682                 struct user32_stat64 user32_sb64;
4683         } dest;
4684         caddr_t sbp;
4685         int error, my_size;
4686         kauth_filesec_t fsec;
4687         size_t xsecurity_bufsize;
4688         void * statptr;
4689
4690 #if NAMEDRSRCFORK
4691         int is_namedstream = 0;
4692         /* stat calls are allowed for resource forks. */
4693         ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4694 #endif
4695         error = namei(ndp);
4696         if (error)
4697                 return (error);
4698         fsec = KAUTH_FILESEC_NONE;
4699
4700         statptr = (void *)&source;
4701
4702 #if NAMEDRSRCFORK
4703         /* Grab reference on the shadow stream file vnode to
4704          * force an inactive on release which will mark it
4705          * for recycle.
4706          */
4707         if (vnode_isnamedstream(ndp->ni_vp) &&
4708             (ndp->ni_vp->v_parent != NULLVP) &&
4709             vnode_isshadow(ndp->ni_vp)) {
4710                 is_namedstream = 1;
4711                 vnode_ref(ndp->ni_vp);
4712         }
4713 #endif
4714
4715         error = vn_stat(ndp->ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
4716
4717 #if NAMEDRSRCFORK
4718         if (is_namedstream) {
4719                 vnode_rele(ndp->ni_vp);
4720         }
4721 #endif
4722         vnode_put(ndp->ni_vp);
4723         nameidone(ndp);
4724
4725         if (error)
4726                 return (error);
4727         /* Zap spare fields */
4728         if (isstat64 != 0) {
4729                 source.sb64.st_lspare = 0;
4730                 source.sb64.st_qspare[0] = 0LL;
4731                 source.sb64.st_qspare[1] = 0LL;
4732                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
4733                         munge_user64_stat64(&source.sb64, &dest.user64_sb64);
4734                         my_size = sizeof(dest.user64_sb64);
4735                         sbp = (caddr_t)&dest.user64_sb64;
4736                 } else {
4737                         munge_user32_stat64(&source.sb64, &dest.user32_sb64);
4738                         my_size = sizeof(dest.user32_sb64);
4739                         sbp = (caddr_t)&dest.user32_sb64;
4740                 }
4741                 /*
4742                  * Check if we raced (post lookup) against the last unlink of a file.
4743                  */
4744                 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
4745                         source.sb64.st_nlink = 1;
4746                 }
4747         } else {
4748                 source.sb.st_lspare = 0;
4749                 source.sb.st_qspare[0] = 0LL;
4750                 source.sb.st_qspare[1] = 0LL;
4751                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
4752                         munge_user64_stat(&source.sb, &dest.user64_sb);
4753                         my_size = sizeof(dest.user64_sb);
4754                         sbp = (caddr_t)&dest.user64_sb;
4755                 } else {
4756                         munge_user32_stat(&source.sb, &dest.user32_sb);
4757                         my_size = sizeof(dest.user32_sb);
4758                         sbp = (caddr_t)&dest.user32_sb;
4759                 }
4760
4761                 /*
4762                  * Check if we raced (post lookup) against the last unlink of a file.
4763                  */
4764                 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
4765                         source.sb.st_nlink = 1;
4766                 }
4767         }
4768         if ((error = copyout(sbp, ub, my_size)) != 0)
4769                 goto out;
4770
4771         /* caller wants extended security information? */
4772         if (xsecurity != USER_ADDR_NULL) {
4773
4774                 /* did we get any? */
4775                 if (fsec == KAUTH_FILESEC_NONE) {
4776                         if (susize(xsecurity_size, 0) != 0) {
4777                                 error = EFAULT;
4778                                 goto out;
4779                         }
4780                 } else {
4781                         /* find the user buffer size */
4782                         xsecurity_bufsize = fusize(xsecurity_size);
4783
4784                         /* copy out the actual data size */
4785                         if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
4786                                 error = EFAULT;
4787                                 goto out;
4788                         }
4789
4790                         /* if the caller supplied enough room, copy out to it */
4791                         if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
4792                                 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
4793                 }
4794         }
4795 out:
4796         if (fsec != KAUTH_FILESEC_NONE)
4797                 kauth_filesec_free(fsec);
4798         return (error);
4799 }
4800
4801 /*
4802  * Get file status; this version follows links.
4803  *
4804  * Returns:     0                       Success
4805  *      stat2:???                       [see stat2() in this file]
4806  */
4807 static int
4808 stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4809 {
4810         struct nameidata nd;
4811         vfs_context_t ctx = vfs_context_current();
4812
4813         NDINIT(&nd, LOOKUP, OP_GETATTR, NOTRIGGER | FOLLOW | AUDITVNPATH1,
4814             UIO_USERSPACE, path, ctx);
4815         return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
4816 }
4817
4818 /*
4819  * stat_extended: Get file status; with extended security (ACL).
4820  *
4821  * Parameters:    p                       (ignored)
4822  *                uap                     User argument descriptor (see below)
4823  *                retval                  (ignored)
4824  *
4825  * Indirect:      uap->path               Path of file to get status from
4826  *                uap->ub                 User buffer (holds file status info)
4827  *                uap->xsecurity          ACL to get (extended security)
4828  *                uap->xsecurity_size     Size of ACL
4829  *
4830  * Returns:        0                      Success
4831  *                !0                      errno value
4832  *
4833  */
4834 int
4835 stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused int32_t *retval)
4836 {
4837         return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
4838 }
4839
4840 /*
4841  * Returns:     0                       Success
4842  *      stat1:???                       [see stat1() in this file]
4843  */
4844 int
4845 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
4846 {
4847         return(stat1(uap->path, uap->ub, 0, 0, 0));
4848 }
4849
4850 int
4851 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
4852 {
4853         return(stat1(uap->path, uap->ub, 0, 0, 1));
4854 }
4855
4856 /*
4857  * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
4858  *
4859  * Parameters:    p                       (ignored)
4860  *                uap                     User argument descriptor (see below)
4861  *                retval                  (ignored)
4862  *
4863  * Indirect:      uap->path               Path of file to get status from
4864  *                uap->ub                 User buffer (holds file status info)
4865  *                uap->xsecurity          ACL to get (extended security)
4866  *                uap->xsecurity_size     Size of ACL
4867  *
4868  * Returns:        0                      Success
4869  *                !0                      errno value
4870  *
4871  */
4872 int
4873 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
4874 {
4875         return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
4876 }
4877 /*
4878  * Get file status; this version does not follow links.
4879  */
4880 static int
4881 lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4882 {
4883         struct nameidata nd;
4884         vfs_context_t ctx = vfs_context_current();
4885
4886         NDINIT(&nd, LOOKUP, OP_GETATTR, NOTRIGGER | NOFOLLOW | AUDITVNPATH1,
4887             UIO_USERSPACE, path, ctx);
4888
4889         return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
4890 }
4891
4892 /*
4893  * lstat_extended: Get file status; does not follow links; with extended security (ACL).
4894  *
4895  * Parameters:    p                       (ignored)
4896  *                uap                     User argument descriptor (see below)
4897  *                retval                  (ignored)
4898  *
4899  * Indirect:      uap->path               Path of file to get status from
4900  *                uap->ub                 User buffer (holds file status info)
4901  *                uap->xsecurity          ACL to get (extended security)
4902  *                uap->xsecurity_size     Size of ACL
4903  *
4904  * Returns:        0                      Success
4905  *                !0                      errno value
4906  *
4907  */
4908 int
4909 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
4910 {
4911         return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
4912 }
4913
4914 int
4915 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
4916 {
4917         return(lstat1(uap->path, uap->ub, 0, 0, 0));
4918 }
4919
4920 int
4921 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
4922 {
4923         return(lstat1(uap->path, uap->ub, 0, 0, 1));
4924 }
4925
4926 /*
4927  * lstat64_extended: Get file status; can handle large inode numbers; does not
4928  * follow links; with extended security (ACL).
4929  *
4930  * Parameters:    p                       (ignored)
4931  *                uap                     User argument descriptor (see below)
4932  *                retval                  (ignored)
4933  *
4934  * Indirect:      uap->path               Path of file to get status from
4935  *                uap->ub                 User buffer (holds file status info)
4936  *                uap->xsecurity          ACL to get (extended security)
4937  *                uap->xsecurity_size     Size of ACL
4938  *
4939  * Returns:        0                      Success
4940  *                !0                      errno value
4941  *
4942  */
4943 int
4944 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
4945 {
4946         return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
4947 }
4948
4949 /*
4950  * Get configurable pathname variables.
4951  *
4952  * Returns:     0                       Success
4953  *      namei:???
4954  *      vn_pathconf:???
4955  *
4956  * Notes:       Global implementation  constants are intended to be
4957  *              implemented in this function directly; all other constants
4958  *              are per-FS implementation, and therefore must be handled in
4959  *              each respective FS, instead.
4960  *
4961  * XXX We implement some things globally right now that should actually be
4962  * XXX per-FS; we will need to deal with this at some point.
4963  */
4964 /* ARGSUSED */
4965 int
4966 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
4967 {
4968         int error;
4969         struct nameidata nd;
4970         vfs_context_t ctx = vfs_context_current();
4971
4972         NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
4973                 UIO_USERSPACE, uap->path, ctx);
4974         error = namei(&nd);
4975         if (error)
4976                 return (error);
4977
4978         error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
4979
4980         vnode_put(nd.ni_vp);
4981         nameidone(&nd);
4982         return (error);
4983 }
4984
4985 /*
4986  * Return target name of a symbolic link.
4987  */
4988 /* ARGSUSED */
4989 int
4990 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
4991 {
4992         vnode_t vp;
4993         uio_t auio;
4994         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
4995         int error;
4996         struct nameidata nd;
4997         vfs_context_t ctx = vfs_context_current();
4998         char uio_buf[ UIO_SIZEOF(1) ];
4999
5000         NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5001                 UIO_USERSPACE, uap->path, ctx);
5002         error = namei(&nd);
5003         if (error)
5004                 return (error);
5005         vp = nd.ni_vp;
5006
5007         nameidone(&nd);
5008
5009         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
5010                                                                   &uio_buf[0], sizeof(uio_buf));
5011         uio_addiov(auio, uap->buf, uap->count);
5012         if (vp->v_type != VLNK)
5013                 error = EINVAL;
5014         else {
5015 #if CONFIG_MACF
5016                 error = mac_vnode_check_readlink(ctx,
5017                     vp);
5018 #endif
5019                 if (error == 0)
5020                         error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx);
5021                 if (error == 0)
5022                         error = VNOP_READLINK(vp, auio, ctx);
5023         }
5024         vnode_put(vp);
5025
5026         /* Safe: uio_resid() is bounded above by "count", and "count" is an int  */
5027         *retval = uap->count - (int)uio_resid(auio);
5028         return (error);
5029 }
5030
5031 /*
5032  * Change file flags.
5033  */
5034 static int
5035 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5036 {
5037         struct vnode_attr va;
5038         kauth_action_t action;
5039         int error;
5040
5041         VATTR_INIT(&va);
5042         VATTR_SET(&va, va_flags, flags);
5043
5044 #if CONFIG_MACF
5045         error = mac_vnode_check_setflags(ctx, vp, flags);
5046         if (error)
5047                 goto out;
5048 #endif
5049
5050         /* request authorisation, disregard immutability */
5051         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5052                 goto out;
5053         /*
5054          * Request that the auth layer disregard those file flags it's allowed to when
5055          * authorizing this operation; we need to do this in order to be able to
5056          * clear immutable flags.
5057          */
5058         if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5059                 goto out;
5060         error = vnode_setattr(vp, &va, ctx);
5061
5062         if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5063                 error = ENOTSUP;
5064         }
5065 out:
5066         vnode_put(vp);
5067         return(error);
5068 }
5069
5070 /*
5071  * Change flags of a file given a path name.
5072  */
5073 /* ARGSUSED */
5074 int
5075 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
5076 {
5077         vnode_t vp;
5078         vfs_context_t ctx = vfs_context_current();
5079         int error;
5080         struct nameidata nd;
5081
5082         AUDIT_ARG(fflags, uap->flags);
5083         NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5084                 UIO_USERSPACE, uap->path, ctx);
5085         error = namei(&nd);
5086         if (error)
5087                 return (error);
5088         vp = nd.ni_vp;
5089         nameidone(&nd);
5090
5091         error = chflags1(vp, uap->flags, ctx);
5092
5093         return(error);
5094 }
5095
5096 /*
5097  * Change flags of a file given a file descriptor.
5098  */
5099 /* ARGSUSED */
5100 int
5101 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
5102 {
5103         vnode_t vp;
5104         int error;
5105
5106         AUDIT_ARG(fd, uap->fd);
5107         AUDIT_ARG(fflags, uap->flags);
5108         if ( (error = file_vnode(uap->fd, &vp)) )
5109                 return (error);
5110
5111         if ((error = vnode_getwithref(vp))) {
5112                 file_drop(uap->fd);
5113                 return(error);
5114         }
5115
5116         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5117
5118         error = chflags1(vp, uap->flags, vfs_context_current());
5119
5120         file_drop(uap->fd);
5121         return (error);
5122 }
5123
5124 /*
5125  * Change security information on a filesystem object.
5126  *
5127  * Returns:     0                       Success
5128  *              EPERM                   Operation not permitted
5129  *              vnode_authattr:???      [anything vnode_authattr can return]
5130  *              vnode_authorize:???     [anything vnode_authorize can return]
5131  *              vnode_setattr:???       [anything vnode_setattr can return]
5132  *
5133  * Notes:       If vnode_authattr or vnode_authorize return EACCES, it will be
5134  *              translated to EPERM before being returned.
5135  */
5136 static int
5137 chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
5138 {
5139         kauth_action_t action;
5140         int error;
5141
5142         AUDIT_ARG(mode, vap->va_mode);
5143         /* XXX audit new args */
5144
5145 #if NAMEDSTREAMS
5146         /* chmod calls are not allowed for resource forks. */
5147         if (vp->v_flag & VISNAMEDSTREAM) {
5148                 return (EPERM);
5149         }
5150 #endif
5151
5152 #if CONFIG_MACF
5153         if (VATTR_IS_ACTIVE(vap, va_mode) &&
5154             (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
5155                 return (error);
5156 #endif
5157
5158         /* make sure that the caller is allowed to set this security information */
5159         if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
5160             ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5161                 if (error == EACCES)
5162                         error = EPERM;
5163                 return(error);
5164         }
5165
5166         error = vnode_setattr(vp, vap, ctx);
5167
5168         return (error);
5169 }
5170
5171
5172 /*
5173  * Change mode of a file given a path name.
5174  *
5175  * Returns:     0                       Success
5176  *              namei:???               [anything namei can return]
5177  *              chmod2:???              [anything chmod2 can return]
5178  */
5179 static int
5180 chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
5181 {
5182         struct nameidata nd;
5183         int error;
5184
5185         NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5186                 UIO_USERSPACE, path, ctx);
5187         if ((error = namei(&nd)))
5188                 return (error);
5189         error = chmod2(ctx, nd.ni_vp, vap);
5190         vnode_put(nd.ni_vp);
5191         nameidone(&nd);
5192         return(error);
5193 }
5194
5195 /*
5196  * chmod_extended: Change the mode of a file given a path name; with extended
5197  * argument list (including extended security (ACL)).
5198  *
5199  * Parameters:  p                       Process requesting the open
5200  *              uap                     User argument descriptor (see below)
5201  *              retval                  (ignored)
5202  *
5203  * Indirect:    uap->path               Path to object (same as 'chmod')
5204  *              uap->uid                UID to set
5205  *              uap->gid                GID to set
5206  *              uap->mode               File mode to set (same as 'chmod')
5207  *              uap->xsecurity          ACL to set (or delete)
5208  *
5209  * Returns:     0                       Success
5210  *              !0                      errno value
5211  *
5212  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
5213  *
5214  * XXX:         We should enummerate the possible errno values here, and where
5215  *              in the code they originated.
5216  */
5217 int
5218 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
5219 {
5220         int error;
5221         struct vnode_attr va;
5222         kauth_filesec_t xsecdst;
5223
5224         AUDIT_ARG(owner, uap->uid, uap->gid);
5225
5226         VATTR_INIT(&va);
5227         if (uap->mode != -1)
5228                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5229         if (uap->uid != KAUTH_UID_NONE)
5230                 VATTR_SET(&va, va_uid, uap->uid);
5231         if (uap->gid != KAUTH_GID_NONE)
5232                 VATTR_SET(&va, va_gid, uap->gid);
5233
5234         xsecdst = NULL;
5235         switch(uap->xsecurity) {
5236                 /* explicit remove request */
5237         case CAST_USER_ADDR_T((void *)1):       /* _FILESEC_REMOVE_ACL */
5238                 VATTR_SET(&va, va_acl, NULL);
5239                 break;
5240                 /* not being set */
5241         case USER_ADDR_NULL:
5242                 break;
5243         default:
5244                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5245                         return(error);
5246                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5247                 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
5248         }
5249
5250         error = chmod1(vfs_context_current(), uap->path, &va);
5251
5252         if (xsecdst != NULL)
5253                 kauth_filesec_free(xsecdst);
5254         return(error);
5255 }
5256
5257 /*
5258  * Returns:     0                       Success
5259  *              chmod1:???              [anything chmod1 can return]
5260  */
5261 int
5262 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
5263 {
5264         struct vnode_attr va;
5265
5266         VATTR_INIT(&va);
5267         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5268
5269         return(chmod1(vfs_context_current(), uap->path, &va));
5270 }
5271
5272 /*
5273  * Change mode of a file given a file descriptor.
5274  */
5275 static int
5276 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
5277 {
5278         vnode_t vp;
5279         int error;
5280
5281         AUDIT_ARG(fd, fd);
5282
5283         if ((error = file_vnode(fd, &vp)) != 0)
5284                 return (error);
5285         if ((error = vnode_getwithref(vp)) != 0) {
5286                 file_drop(fd);
5287                 return(error);
5288         }
5289         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5290
5291         error = chmod2(vfs_context_current(), vp, vap);
5292         (void)vnode_put(vp);
5293         file_drop(fd);
5294
5295         return (error);
5296 }
5297
5298 /*
5299  * fchmod_extended: Change mode of a file given a file descriptor; with
5300  * extended argument list (including extended security (ACL)).
5301  *
5302  * Parameters:    p                       Process requesting to change file mode
5303  *                uap                     User argument descriptor (see below)
5304  *                retval                  (ignored)
5305  *
5306  * Indirect:      uap->mode               File mode to set (same as 'chmod')
5307  *                uap->uid                UID to set
5308  *                uap->gid                GID to set
5309  *                uap->xsecurity          ACL to set (or delete)
5310  *                uap->fd                 File descriptor of file to change mode
5311  *
5312  * Returns:        0                      Success
5313  *                !0                      errno value
5314  *
5315  */
5316 int
5317 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
5318 {
5319         int error;
5320         struct vnode_attr va;
5321         kauth_filesec_t xsecdst;
5322
5323         AUDIT_ARG(owner, uap->uid, uap->gid);
5324
5325         VATTR_INIT(&va);
5326         if (uap->mode != -1)
5327                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5328         if (uap->uid != KAUTH_UID_NONE)
5329                 VATTR_SET(&va, va_uid, uap->uid);
5330         if (uap->gid != KAUTH_GID_NONE)
5331                 VATTR_SET(&va, va_gid, uap->gid);
5332
5333         xsecdst = NULL;
5334         switch(uap->xsecurity) {
5335         case USER_ADDR_NULL:
5336                 VATTR_SET(&va, va_acl, NULL);
5337                 break;
5338         case CAST_USER_ADDR_T(-1):
5339                 break;
5340         default:
5341                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5342                         return(error);
5343                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5344         }
5345
5346         error = fchmod1(p, uap->fd, &va);
5347
5348
5349         switch(uap->xsecurity) {
5350         case USER_ADDR_NULL:
5351         case CAST_USER_ADDR_T(-1):
5352                 break;
5353         default:
5354                 if (xsecdst != NULL)
5355                         kauth_filesec_free(xsecdst);
5356         }
5357         return(error);
5358 }
5359
5360 int
5361 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
5362 {
5363         struct vnode_attr va;
5364
5365         VATTR_INIT(&va);
5366         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5367
5368         return(fchmod1(p, uap->fd, &va));
5369 }
5370
5371
5372 /*
5373  * Set ownership given a path name.
5374  */
5375 /* ARGSUSED */
5376 static int
5377 chown1(vfs_context_t ctx, struct chown_args *uap, __unused int32_t *retval, int follow)
5378 {
5379         vnode_t vp;
5380         struct vnode_attr va;
5381         int error;
5382         struct nameidata nd;
5383         kauth_action_t action;
5384
5385         AUDIT_ARG(owner, uap->uid, uap->gid);
5386
5387         NDINIT(&nd, LOOKUP, OP_SETATTR,
5388                 (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1,
5389                 UIO_USERSPACE, uap->path, ctx);
5390         error = namei(&nd);
5391         if (error)
5392                 return (error);
5393         vp = nd.ni_vp;
5394
5395         nameidone(&nd);
5396
5397         VATTR_INIT(&va);
5398         if (uap->uid != VNOVAL)
5399                 VATTR_SET(&va, va_uid, uap->uid);
5400         if (uap->gid != VNOVAL)
5401                 VATTR_SET(&va, va_gid, uap->gid);
5402
5403 #if CONFIG_MACF
5404         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
5405         if (error)
5406                 goto out;
5407 #endif
5408
5409         /* preflight and authorize attribute changes */
5410         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5411                 goto out;
5412         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
5413                 goto out;
5414         error = vnode_setattr(vp, &va, ctx);
5415
5416 out:
5417         /*
5418          * EACCES is only allowed from namei(); permissions failure should
5419          * return EPERM, so we need to translate the error code.
5420          */
5421         if (error == EACCES)
5422                 error = EPERM;
5423
5424         vnode_put(vp);
5425         return (error);
5426 }
5427
5428 int
5429 chown(__unused proc_t p, struct chown_args *uap, int32_t *retval)
5430 {
5431         return chown1(vfs_context_current(), uap, retval, 1);
5432 }
5433
5434 int
5435 lchown(__unused proc_t p, struct lchown_args *uap, int32_t *retval)
5436 {
5437         /* Argument list identical, but machine generated; cast for chown1() */
5438         return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0);
5439 }
5440
5441 /*
5442  * Set ownership given a file descriptor.
5443  */
5444 /* ARGSUSED */
5445 int
5446 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
5447 {
5448         struct vnode_attr va;
5449         vfs_context_t ctx = vfs_context_current();
5450         vnode_t vp;
5451         int error;
5452         kauth_action_t action;
5453
5454         AUDIT_ARG(owner, uap->uid, uap->gid);
5455         AUDIT_ARG(fd, uap->fd);
5456
5457         if ( (error = file_vnode(uap->fd, &vp)) )
5458                 return (error);
5459
5460         if ( (error = vnode_getwithref(vp)) ) {
5461                 file_drop(uap->fd);
5462                 return(error);
5463         }
5464         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5465
5466         VATTR_INIT(&va);
5467         if (uap->uid != VNOVAL)
5468                 VATTR_SET(&va, va_uid, uap->uid);
5469         if (uap->gid != VNOVAL)
5470                 VATTR_SET(&va, va_gid, uap->gid);
5471
5472 #if NAMEDSTREAMS
5473         /* chown calls are not allowed for resource forks. */
5474         if (vp->v_flag & VISNAMEDSTREAM) {
5475                 error = EPERM;
5476                 goto out;
5477         }
5478 #endif
5479
5480 #if CONFIG_MACF
5481         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
5482         if (error)
5483                 goto out;
5484 #endif
5485
5486         /* preflight and authorize attribute changes */
5487         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5488                 goto out;
5489         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5490                 if (error == EACCES)
5491                         error = EPERM;
5492                 goto out;
5493         }
5494         error = vnode_setattr(vp, &va, ctx);
5495
5496 out:
5497         (void)vnode_put(vp);
5498         file_drop(uap->fd);
5499         return (error);
5500 }
5501
5502 static int
5503 getutimes(user_addr_t usrtvp, struct timespec *tsp)
5504 {
5505         int error;
5506
5507         if (usrtvp == USER_ADDR_NULL) {
5508                 struct timeval old_tv;
5509                 /* XXX Y2038 bug because of microtime argument */
5510                 microtime(&old_tv);
5511                 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
5512                 tsp[1] = tsp[0];
5513         } else {
5514                 if (IS_64BIT_PROCESS(current_proc())) {
5515                         struct user64_timeval tv[2];
5516                         error = copyin(usrtvp, (void *)tv, sizeof(tv));
5517                         if (error)
5518                                 return (error);
5519                         TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
5520                         TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
5521                 } else {
5522                         struct user32_timeval tv[2];
5523                         error = copyin(usrtvp, (void *)tv, sizeof(tv));
5524                         if (error)
5525                                 return (error);
5526                         TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
5527                         TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
5528                 }
5529         }
5530         return 0;
5531 }
5532
5533 static int
5534 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
5535         int nullflag)
5536 {
5537         int error;
5538         struct vnode_attr va;
5539         kauth_action_t action;
5540
5541         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5542
5543         VATTR_INIT(&va);
5544         VATTR_SET(&va, va_access_time, ts[0]);
5545         VATTR_SET(&va, va_modify_time, ts[1]);
5546         if (nullflag)
5547                 va.va_vaflags |= VA_UTIMES_NULL;
5548
5549 #if NAMEDSTREAMS
5550         /* utimes calls are not allowed for resource forks. */
5551         if (vp->v_flag & VISNAMEDSTREAM) {
5552                 error = EPERM;
5553                 goto out;
5554         }
5555 #endif
5556
5557 #if CONFIG_MACF
5558         error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
5559         if (error)
5560                 goto out;
5561 #endif
5562         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
5563                 if (!nullflag && error == EACCES)
5564                         error = EPERM;
5565                 goto out;
5566         }
5567
5568         /* since we may not need to auth anything, check here */
5569         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5570                 if (!nullflag && error == EACCES)
5571                         error = EPERM;
5572                 goto out;
5573         }
5574         error = vnode_setattr(vp, &va, ctx);
5575
5576 out:
5577         return error;
5578 }
5579
5580 /*
5581  * Set the access and modification times of a file.
5582  */
5583 /* ARGSUSED */
5584 int
5585 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
5586 {
5587         struct timespec ts[2];
5588         user_addr_t usrtvp;
5589         int error;
5590         struct nameidata nd;
5591         vfs_context_t ctx = vfs_context_current();
5592
5593         /*
5594          * AUDIT: Needed to change the order of operations to do the
5595          * name lookup first because auditing wants the path.
5596          */
5597         NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5598                 UIO_USERSPACE, uap->path, ctx);
5599         error = namei(&nd);
5600         if (error)
5601                 return (error);
5602         nameidone(&nd);
5603
5604         /*
5605          * Fetch the user-supplied time.  If usrtvp is USER_ADDR_NULL, we fetch
5606          * the current time instead.
5607          */
5608         usrtvp = uap->tptr;
5609         if ((error = getutimes(usrtvp, ts)) != 0)
5610                 goto out;
5611
5612         error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
5613
5614 out:
5615         vnode_put(nd.ni_vp);
5616         return (error);
5617 }
5618
5619 /*
5620  * Set the access and modification times of a file.
5621  */
5622 /* ARGSUSED */
5623 int
5624 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
5625 {
5626         struct timespec ts[2];
5627         vnode_t vp;
5628         user_addr_t usrtvp;
5629         int error;
5630
5631         AUDIT_ARG(fd, uap->fd);
5632         usrtvp = uap->tptr;
5633         if ((error = getutimes(usrtvp, ts)) != 0)
5634                 return (error);
5635         if ((error = file_vnode(uap->fd, &vp)) != 0)
5636                 return (error);
5637         if((error = vnode_getwithref(vp))) {
5638                 file_drop(uap->fd);
5639                 return(error);
5640         }
5641
5642         error =  setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
5643         vnode_put(vp);
5644         file_drop(uap->fd);
5645         return(error);
5646 }
5647
5648 /*
5649  * Truncate a file given its path name.
5650  */
5651 /* ARGSUSED */
5652 int
5653 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
5654 {
5655         vnode_t vp;
5656         struct vnode_attr va;
5657         vfs_context_t ctx = vfs_context_current();
5658         int error;
5659         struct nameidata nd;
5660         kauth_action_t action;
5661
5662         if (uap->length < 0)
5663                 return(EINVAL);
5664         NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
5665                 UIO_USERSPACE, uap->path, ctx);
5666         if ((error = namei(&nd)))
5667                 return (error);
5668         vp = nd.ni_vp;
5669
5670         nameidone(&nd);
5671
5672         VATTR_INIT(&va);
5673         VATTR_SET(&va, va_data_size, uap->length);
5674
5675 #if CONFIG_MACF
5676         error = mac_vnode_check_truncate(ctx, NOCRED, vp);
5677         if (error)
5678                 goto out;
5679 #endif
5680
5681         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5682                 goto out;
5683         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
5684                 goto out;
5685         error = vnode_setattr(vp, &va, ctx);
5686 out:
5687         vnode_put(vp);
5688         return (error);
5689 }
5690
5691 /*
5692  * Truncate a file given a file descriptor.
5693  */
5694 /* ARGSUSED */
5695 int
5696 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
5697 {
5698         vfs_context_t ctx = vfs_context_current();
5699         struct vnode_attr va;
5700         vnode_t vp;
5701         struct fileproc *fp;
5702         int error ;
5703         int fd = uap->fd;
5704
5705         AUDIT_ARG(fd, uap->fd);
5706         if (uap->length < 0)
5707                 return(EINVAL);
5708
5709         if ( (error = fp_lookup(p,fd,&fp,0)) ) {
5710                 return(error);
5711         }
5712
5713         if (fp->f_fglob->fg_type == DTYPE_PSXSHM) {
5714                 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
5715                 goto out;
5716         }
5717         if (fp->f_fglob->fg_type != DTYPE_VNODE)  {
5718                 error = EINVAL;
5719                 goto out;
5720         }
5721
5722         vp = (vnode_t)fp->f_fglob->fg_data;
5723
5724         if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
5725                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
5726                 error = EINVAL;
5727                 goto out;
5728         }
5729
5730         if ((error = vnode_getwithref(vp)) != 0) {
5731                 goto out;
5732         }
5733
5734         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5735
5736 #if CONFIG_MACF
5737         error = mac_vnode_check_truncate(ctx,
5738             fp->f_fglob->fg_cred, vp);
5739         if (error) {
5740                 (void)vnode_put(vp);
5741                 goto out;
5742         }
5743 #endif
5744         VATTR_INIT(&va);
5745         VATTR_SET(&va, va_data_size, uap->length);
5746         error = vnode_setattr(vp, &va, ctx);
5747         (void)vnode_put(vp);
5748 out:
5749         file_drop(fd);
5750         return (error);
5751 }
5752
5753
5754 /*
5755  * Sync an open file with synchronized I/O _file_ integrity completion
5756  */
5757 /* ARGSUSED */
5758 int
5759 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
5760 {
5761         __pthread_testcancel(1);
5762         return(fsync_common(p, uap, MNT_WAIT));
5763 }
5764
5765
5766 /*
5767  * Sync an open file with synchronized I/O _file_ integrity completion
5768  *
5769  * Notes:       This is a legacy support function that does not test for
5770  *              thread cancellation points.
5771  */
5772 /* ARGSUSED */
5773 int
5774 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
5775 {
5776         return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
5777 }
5778
5779
5780 /*
5781  * Sync an open file with synchronized I/O _data_ integrity completion
5782  */
5783 /* ARGSUSED */
5784 int
5785 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
5786 {
5787         __pthread_testcancel(1);
5788         return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
5789 }
5790
5791
5792 /*
5793  * fsync_common
5794  *
5795  * Common fsync code to support both synchronized I/O file integrity completion
5796  * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
5797  *
5798  * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
5799  * will only guarantee that the file data contents are retrievable.  If
5800  * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
5801  * includes additional metadata unnecessary for retrieving the file data
5802  * contents, such as atime, mtime, ctime, etc., also be committed to stable
5803  * storage.
5804  *
5805  * Parameters:  p                               The process
5806  *              uap->fd                         The descriptor to synchronize
5807  *              flags                           The data integrity flags
5808  *
5809  * Returns:     int                             Success
5810  *      fp_getfvp:EBADF                         Bad file descriptor
5811  *      fp_getfvp:ENOTSUP                       fd does not refer to a vnode
5812  *      VNOP_FSYNC:???                          unspecified
5813  *
5814  * Notes:       We use struct fsync_args because it is a short name, and all
5815  *              caller argument structures are otherwise identical.
5816  */
5817 static int
5818 fsync_common(proc_t p, struct fsync_args *uap, int flags)
5819 {
5820         vnode_t vp;
5821         struct fileproc *fp;
5822         vfs_context_t ctx = vfs_context_current();
5823         int error;
5824
5825         AUDIT_ARG(fd, uap->fd);
5826
5827         if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
5828                 return (error);
5829         if ( (error = vnode_getwithref(vp)) ) {
5830                 file_drop(uap->fd);
5831                 return(error);
5832         }
5833
5834         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5835
5836         error = VNOP_FSYNC(vp, flags, ctx);
5837
5838 #if NAMEDRSRCFORK
5839         /* Sync resource fork shadow file if necessary. */
5840         if ((error == 0) &&
5841             (vp->v_flag & VISNAMEDSTREAM) &&
5842             (vp->v_parent != NULLVP) &&
5843             vnode_isshadow(vp) &&
5844             (fp->f_flags & FP_WRITTEN)) {
5845                 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
5846         }
5847 #endif
5848
5849         (void)vnode_put(vp);
5850         file_drop(uap->fd);
5851         return (error);
5852 }
5853
5854 /*
5855  * Duplicate files.  Source must be a file, target must be a file or
5856  * must not exist.
5857  *
5858  * XXX Copyfile authorisation checking is woefully inadequate, and will not
5859  *     perform inheritance correctly.
5860  */
5861 /* ARGSUSED */
5862 int
5863 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
5864 {
5865         vnode_t tvp, fvp, tdvp, sdvp;
5866         struct nameidata fromnd, tond;
5867         int error;
5868         vfs_context_t ctx = vfs_context_current();
5869
5870         /* Check that the flags are valid. */
5871
5872         if (uap->flags & ~CPF_MASK) {
5873                 return(EINVAL);
5874         }
5875
5876         NDINIT(&fromnd, LOOKUP, OP_COPYFILE, SAVESTART | AUDITVNPATH1,
5877                 UIO_USERSPACE, uap->from, ctx);
5878         if ((error = namei(&fromnd)))
5879                 return (error);
5880         fvp = fromnd.ni_vp;
5881
5882         NDINIT(&tond, CREATE, OP_LINK,
5883                LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
5884                UIO_USERSPACE, uap->to, ctx);
5885         if ((error = namei(&tond))) {
5886                 goto out1;
5887         }
5888         tdvp = tond.ni_dvp;
5889         tvp = tond.ni_vp;
5890
5891         if (tvp != NULL) {
5892                 if (!(uap->flags & CPF_OVERWRITE)) {
5893                         error = EEXIST;
5894                         goto out;
5895                 }
5896         }
5897         if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
5898                 error = EISDIR;
5899                 goto out;
5900         }
5901
5902         if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
5903                 goto out;
5904
5905         if (fvp == tdvp)
5906                 error = EINVAL;
5907         /*
5908          * If source is the same as the destination (that is the
5909          * same inode number) then there is nothing to do.
5910          * (fixed to have POSIX semantics - CSM 3/2/98)
5911          */
5912         if (fvp == tvp)
5913                 error = -1;
5914         if (!error)
5915                 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
5916 out:
5917         sdvp = tond.ni_startdir;
5918         /*
5919          * nameidone has to happen before we vnode_put(tdvp)
5920          * since it may need to release the fs_nodelock on the tdvp
5921          */
5922         nameidone(&tond);
5923
5924         if (tvp)
5925                 vnode_put(tvp);
5926         vnode_put(tdvp);
5927         vnode_put(sdvp);
5928 out1:
5929         vnode_put(fvp);
5930
5931         if (fromnd.ni_startdir)
5932                 vnode_put(fromnd.ni_startdir);
5933         nameidone(&fromnd);
5934
5935         if (error == -1)
5936                 return (0);
5937         return (error);
5938 }
5939
5940
5941 /*
5942  * Rename files.  Source and destination must either both be directories,
5943  * or both not be directories.  If target is a directory, it must be empty.
5944  */
5945 /* ARGSUSED */
5946 int
5947 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
5948 {
5949         vnode_t tvp, tdvp;
5950         vnode_t fvp, fdvp;
5951         struct nameidata *fromnd, *tond;
5952         vfs_context_t ctx = vfs_context_current();
5953         int error;
5954         int do_retry;
5955         int mntrename;
5956         int need_event;
5957         const char *oname = NULL;
5958         char *from_name = NULL, *to_name = NULL;
5959         int from_len=0, to_len=0;
5960         int holding_mntlock;
5961         mount_t locked_mp = NULL;
5962         vnode_t oparent = NULLVP;
5963 #if CONFIG_FSE
5964         fse_info from_finfo, to_finfo;
5965 #endif
5966         int from_truncated=0, to_truncated;
5967         int batched = 0;
5968         struct vnode_attr *fvap, *tvap;
5969         int continuing = 0;
5970         /* carving out a chunk for structs that are too big to be on stack. */
5971         struct {
5972                 struct nameidata from_node, to_node;
5973                 struct vnode_attr fv_attr, tv_attr;
5974         } * __rename_data;
5975         MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
5976         fromnd = &__rename_data->from_node;
5977         tond = &__rename_data->to_node;
5978
5979         holding_mntlock = 0;
5980         do_retry = 0;
5981 retry:
5982         fvp = tvp = NULL;
5983         fdvp = tdvp = NULL;
5984         fvap = tvap = NULL;
5985         mntrename = FALSE;
5986
5987         NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
5988                UIO_USERSPACE, uap->from, ctx);
5989         fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
5990
5991         NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
5992                UIO_USERSPACE, uap->to, ctx);
5993         tond->ni_flag = NAMEI_COMPOUNDRENAME;
5994
5995 continue_lookup:
5996         if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
5997                 if ( (error = namei(fromnd)) )
5998                         goto out1;
5999                 fdvp = fromnd->ni_dvp;
6000                 fvp  = fromnd->ni_vp;
6001
6002                 if (fvp && fvp->v_type == VDIR)
6003                         tond->ni_cnd.cn_flags |= WILLBEDIR;
6004         }
6005
6006         if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
6007                 if ( (error = namei(tond)) ) {
6008                         /*
6009                          * Translate error code for rename("dir1", "dir2/.").
6010                          */
6011                         if (error == EISDIR && fvp->v_type == VDIR)
6012                                 error = EINVAL;
6013                         goto out1;
6014                 }
6015                 tdvp = tond->ni_dvp;
6016                 tvp  = tond->ni_vp;
6017         }
6018
6019         batched = vnode_compound_rename_available(fdvp);
6020         if (!fvp) {
6021                 /*
6022                  * Claim: this check will never reject a valid rename.
6023                  * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
6024                  * Suppose fdvp and tdvp are not on the same mount.
6025                  * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem.  If fvp is the root,
6026                  *      then you can't move it to within another dir on the same mountpoint.
6027                  * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
6028                  *
6029                  * If this check passes, then we are safe to pass these vnodes to the same FS.
6030                  */
6031                 if (fdvp->v_mount != tdvp->v_mount) {
6032                         error = EXDEV;
6033                         goto out1;
6034                 }
6035                 goto skipped_lookup;
6036         }
6037
6038         if (!batched) {
6039                 error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL);
6040                 if (error) {
6041                         if (error == ENOENT) {
6042                                 /*
6043                                  * We encountered a race where after doing the namei, tvp stops
6044                                  * being valid. If so, simply re-drive the rename call from the
6045                                  * top.
6046                                  */
6047                                 do_retry = 1;
6048                         }
6049                         goto out1;
6050                 }
6051         }
6052
6053         /*
6054          * If the source and destination are the same (i.e. they're
6055          * links to the same vnode) and the target file system is
6056          * case sensitive, then there is nothing to do.
6057          *
6058          * XXX Come back to this.
6059          */
6060         if (fvp == tvp) {
6061                 int pathconf_val;
6062
6063                 /*
6064                  * Note: if _PC_CASE_SENSITIVE selector isn't supported,
6065                  * then assume that this file system is case sensitive.
6066                  */
6067                 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
6068                     pathconf_val != 0) {
6069                         goto out1;
6070                 }
6071         }
6072
6073         /*
6074          * Allow the renaming of mount points.
6075          * - target must not exist
6076          * - target must reside in the same directory as source
6077          * - union mounts cannot be renamed
6078          * - "/" cannot be renamed
6079          *
6080          * XXX Handle this in VFS after a continued lookup (if we missed
6081          * in the cache to start off)
6082          */
6083         if ((fvp->v_flag & VROOT) &&
6084             (fvp->v_type == VDIR) &&
6085             (tvp == NULL)  &&
6086             (fvp->v_mountedhere == NULL)  &&
6087             (fdvp == tdvp)  &&
6088             ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0)  &&
6089             (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
6090                 vnode_t coveredvp;
6091
6092                 /* switch fvp to the covered vnode */
6093                 coveredvp = fvp->v_mount->mnt_vnodecovered;
6094                 if ( (vnode_getwithref(coveredvp)) ) {
6095                         error = ENOENT;
6096                         goto out1;
6097                 }
6098                 vnode_put(fvp);
6099
6100                 fvp = coveredvp;
6101                 mntrename = TRUE;
6102         }
6103         /*
6104          * Check for cross-device rename.
6105          */
6106         if ((fvp->v_mount != tdvp->v_mount) ||
6107             (tvp && (fvp->v_mount != tvp->v_mount))) {
6108                 error = EXDEV;
6109                 goto out1;
6110         }
6111
6112         /*
6113          * If source is the same as the destination (that is the
6114          * same inode number) then there is nothing to do...
6115          * EXCEPT if the underlying file system supports case
6116          * insensitivity and is case preserving.  In this case
6117          * the file system needs to handle the special case of
6118          * getting the same vnode as target (fvp) and source (tvp).
6119          *
6120          * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
6121          * and _PC_CASE_PRESERVING can have this exception, and they need to
6122          * handle the special case of getting the same vnode as target and
6123          * source.  NOTE: Then the target is unlocked going into vnop_rename,
6124          * so not to cause locking problems. There is a single reference on tvp.
6125          *
6126          * NOTE - that fvp == tvp also occurs if they are hard linked and
6127          * that correct behaviour then is just to return success without doing
6128          * anything.
6129          *
6130          * XXX filesystem should take care of this itself, perhaps...
6131          */
6132         if (fvp == tvp && fdvp == tdvp) {
6133                 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
6134                     !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
6135                           fromnd->ni_cnd.cn_namelen)) {
6136                         goto out1;
6137                 }
6138         }
6139
6140         if (holding_mntlock && fvp->v_mount != locked_mp) {
6141                 /*
6142                  * we're holding a reference and lock
6143                  * on locked_mp, but it no longer matches
6144                  * what we want to do... so drop our hold
6145                  */
6146                 mount_unlock_renames(locked_mp);
6147                 mount_drop(locked_mp, 0);
6148                 holding_mntlock = 0;
6149         }
6150         if (tdvp != fdvp && fvp->v_type == VDIR) {
6151                 /*
6152                  * serialize renames that re-shape
6153                  * the tree... if holding_mntlock is
6154                  * set, then we're ready to go...
6155                  * otherwise we
6156                  * first need to drop the iocounts
6157                  * we picked up, second take the
6158                  * lock to serialize the access,
6159                  * then finally start the lookup
6160                  * process over with the lock held
6161                  */
6162                 if (!holding_mntlock) {
6163                         /*
6164                          * need to grab a reference on
6165                          * the mount point before we
6166                          * drop all the iocounts... once
6167                          * the iocounts are gone, the mount
6168                          * could follow
6169                          */
6170                         locked_mp = fvp->v_mount;
6171                         mount_ref(locked_mp, 0);
6172
6173                         /*
6174                          * nameidone has to happen before we vnode_put(tvp)
6175                          * since it may need to release the fs_nodelock on the tvp
6176                          */
6177                         nameidone(tond);
6178
6179                         if (tvp)
6180                                 vnode_put(tvp);
6181                         vnode_put(tdvp);
6182
6183                         /*
6184                          * nameidone has to happen before we vnode_put(fdvp)
6185                          * since it may need to release the fs_nodelock on the fvp
6186                          */
6187                         nameidone(fromnd);
6188
6189                         vnode_put(fvp);
6190                         vnode_put(fdvp);
6191
6192                         mount_lock_renames(locked_mp);
6193                         holding_mntlock = 1;
6194
6195                         goto retry;
6196                 }
6197         } else {
6198                 /*
6199                  * when we dropped the iocounts to take
6200                  * the lock, we allowed the identity of
6201                  * the various vnodes to change... if they did,
6202                  * we may no longer be dealing with a rename
6203                  * that reshapes the tree... once we're holding
6204                  * the iocounts, the vnodes can't change type
6205                  * so we're free to drop the lock at this point
6206                  * and continue on
6207                  */
6208                 if (holding_mntlock) {
6209                         mount_unlock_renames(locked_mp);
6210                         mount_drop(locked_mp, 0);
6211                         holding_mntlock = 0;
6212                 }
6213         }
6214
6215         // save these off so we can later verify that fvp is the same
6216         oname   = fvp->v_name;
6217         oparent = fvp->v_parent;
6218
6219 skipped_lookup:
6220 #if CONFIG_FSE
6221         need_event = need_fsevent(FSE_RENAME, fdvp);
6222         if (need_event) {
6223                 if (fvp) {
6224                         get_fse_info(fvp, &from_finfo, ctx);
6225                 } else {
6226                         error = vfs_get_notify_attributes(&__rename_data->fv_attr);
6227                         if (error) {
6228                                 goto out1;
6229                         }
6230
6231                         fvap = &__rename_data->fv_attr;
6232                 }
6233
6234                 if (tvp) {
6235                         get_fse_info(tvp, &to_finfo, ctx);
6236                 } else if (batched) {
6237                         error = vfs_get_notify_attributes(&__rename_data->tv_attr);
6238                         if (error) {
6239                                 goto out1;
6240                         }
6241
6242                         tvap = &__rename_data->tv_attr;
6243                 }
6244         }
6245 #else
6246         need_event = 0;
6247 #endif /* CONFIG_FSE */
6248
6249         if (need_event || kauth_authorize_fileop_has_listeners()) {
6250                 if (from_name == NULL) {
6251                         GET_PATH(from_name);
6252                         if (from_name == NULL) {
6253                                 error = ENOMEM;
6254                                 goto out1;
6255                         }
6256                 }
6257
6258                 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
6259
6260                 if (to_name == NULL) {
6261                         GET_PATH(to_name);
6262                         if (to_name == NULL) {
6263                                 error = ENOMEM;
6264                                 goto out1;
6265                         }
6266                 }
6267
6268                 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
6269         }
6270
6271         error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
6272                             tdvp, &tvp, &tond->ni_cnd, tvap,
6273                             0, ctx);
6274
6275         if (holding_mntlock) {
6276                 /*
6277                  * we can drop our serialization
6278                  * lock now
6279                  */
6280                 mount_unlock_renames(locked_mp);
6281                 mount_drop(locked_mp, 0);
6282                 holding_mntlock = 0;
6283         }
6284         if (error) {
6285                 if (error == EKEEPLOOKING) {
6286                         if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6287                                 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6288                                         panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
6289                                 }
6290                         }
6291
6292                         fromnd->ni_vp = fvp;
6293                         tond->ni_vp = tvp;
6294
6295                         goto continue_lookup;
6296                 }
6297
6298                 /*
6299                  * We may encounter a race in the VNOP where the destination didn't
6300                  * exist when we did the namei, but it does by the time we go and
6301                  * try to create the entry. In this case, we should re-drive this rename
6302                  * call from the top again.  Currently, only HFS bubbles out ERECYCLE,
6303                  * but other filesystems susceptible to this race could return it, too.
6304                  */
6305                 if (error == ERECYCLE) {
6306                         do_retry = 1;
6307                 }
6308
6309                 goto out1;
6310         }
6311
6312         /* call out to allow 3rd party notification of rename.
6313          * Ignore result of kauth_authorize_fileop call.
6314          */
6315         kauth_authorize_fileop(vfs_context_ucred(ctx),
6316                         KAUTH_FILEOP_RENAME,
6317                         (uintptr_t)from_name, (uintptr_t)to_name);
6318
6319 #if CONFIG_FSE
6320         if (from_name != NULL && to_name != NULL) {
6321                 if (from_truncated || to_truncated) {
6322                         // set it here since only the from_finfo gets reported up to user space
6323                         from_finfo.mode |= FSE_TRUNCATED_PATH;
6324                 }
6325
6326                 if (tvap && tvp) {
6327                         vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
6328                 }
6329                 if (fvap) {
6330                         vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
6331                 }
6332
6333                 if (tvp) {
6334                         add_fsevent(FSE_RENAME, ctx,
6335                                     FSE_ARG_STRING, from_len, from_name,
6336                                     FSE_ARG_FINFO, &from_finfo,
6337                                     FSE_ARG_STRING, to_len, to_name,
6338                                     FSE_ARG_FINFO, &to_finfo,
6339                                     FSE_ARG_DONE);
6340                 } else {
6341                         add_fsevent(FSE_RENAME, ctx,
6342                                     FSE_ARG_STRING, from_len, from_name,
6343                                     FSE_ARG_FINFO, &from_finfo,
6344                                     FSE_ARG_STRING, to_len, to_name,
6345                                     FSE_ARG_DONE);
6346                 }
6347         }
6348 #endif /* CONFIG_FSE */
6349
6350         /*
6351          * update filesystem's mount point data
6352          */
6353         if (mntrename) {
6354                 char *cp, *pathend, *mpname;
6355                 char * tobuf;
6356                 struct mount *mp;
6357                 int maxlen;
6358                 size_t len = 0;
6359
6360                 mp = fvp->v_mountedhere;
6361
6362                 if (vfs_busy(mp, LK_NOWAIT)) {
6363                         error = EBUSY;
6364                         goto out1;
6365                 }
6366                 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
6367
6368                 error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len);
6369                 if (!error) {
6370                         /* find current mount point prefix */
6371                         pathend = &mp->mnt_vfsstat.f_mntonname[0];
6372                         for (cp = pathend; *cp != '\0'; ++cp) {
6373                                 if (*cp == '/')
6374                                         pathend = cp + 1;
6375                         }
6376                         /* find last component of target name */
6377                         for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
6378                                 if (*cp == '/')
6379                                         mpname = cp + 1;
6380                         }
6381                         /* append name to prefix */
6382                         maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
6383                         bzero(pathend, maxlen);
6384                         strlcpy(pathend, mpname, maxlen);
6385                 }
6386                 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
6387
6388                 vfs_unbusy(mp);
6389         }
6390         /*
6391          * fix up name & parent pointers.  note that we first
6392          * check that fvp has the same name/parent pointers it
6393          * had before the rename call... this is a 'weak' check
6394          * at best...
6395          *
6396          * XXX oparent and oname may not be set in the compound vnop case
6397          */
6398         if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
6399                 int update_flags;
6400
6401                 update_flags = VNODE_UPDATE_NAME;
6402
6403                 if (fdvp != tdvp)
6404                         update_flags |= VNODE_UPDATE_PARENT;
6405
6406                 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
6407         }
6408 out1:
6409         if (to_name != NULL) {
6410                 RELEASE_PATH(to_name);
6411                 to_name = NULL;
6412         }
6413         if (from_name != NULL) {
6414                 RELEASE_PATH(from_name);
6415                 from_name = NULL;
6416         }
6417         if (holding_mntlock) {
6418                 mount_unlock_renames(locked_mp);
6419                 mount_drop(locked_mp, 0);
6420                 holding_mntlock = 0;
6421         }
6422         if (tdvp) {
6423                 /*
6424                  * nameidone has to happen before we vnode_put(tdvp)
6425                  * since it may need to release the fs_nodelock on the tdvp
6426                  */
6427                 nameidone(tond);
6428
6429                 if (tvp)
6430                         vnode_put(tvp);
6431                 vnode_put(tdvp);
6432         }
6433         if (fdvp) {
6434                 /*
6435                  * nameidone has to happen before we vnode_put(fdvp)
6436                  * since it may need to release the fs_nodelock on the fdvp
6437                  */
6438                 nameidone(fromnd);
6439
6440                 if (fvp)
6441                         vnode_put(fvp);
6442                 vnode_put(fdvp);
6443         }
6444
6445
6446         /*
6447          * If things changed after we did the namei, then we will re-drive
6448          * this rename call from the top.
6449          */
6450         if (do_retry) {
6451                 do_retry = 0;
6452                 goto retry;
6453         }
6454
6455         FREE(__rename_data, M_TEMP);
6456         return (error);
6457 }
6458
6459 /*
6460  * Make a directory file.
6461  *
6462  * Returns:     0                       Success
6463  *              EEXIST
6464  *      namei:???
6465  *      vnode_authorize:???
6466  *      vn_create:???
6467  */
6468 /* ARGSUSED */
6469 static int
6470 mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
6471 {
6472         vnode_t vp, dvp;
6473         int error;
6474         int update_flags = 0;
6475         int batched;
6476         struct nameidata nd;
6477
6478         AUDIT_ARG(mode, vap->va_mode);
6479         NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, UIO_USERSPACE,
6480                path, ctx);
6481         nd.ni_cnd.cn_flags |= WILLBEDIR;
6482         nd.ni_flag = NAMEI_COMPOUNDMKDIR;
6483
6484 continue_lookup:
6485         error = namei(&nd);
6486         if (error)
6487                 return (error);
6488         dvp = nd.ni_dvp;
6489         vp = nd.ni_vp;
6490
6491         if (vp != NULL) {
6492                 error = EEXIST;
6493                 goto out;
6494         }
6495
6496         batched = vnode_compound_mkdir_available(dvp);
6497
6498         VATTR_SET(vap, va_type, VDIR);
6499
6500         /*
6501          * XXX
6502          * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
6503          * only get EXISTS or EISDIR for existing path components, and not that it could see
6504          * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
6505          * it will fail in a spurious  manner.  Need to figure out if this is valid behavior.
6506          */
6507         if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
6508                 if (error == EACCES || error == EPERM) {
6509                         int error2;
6510
6511                         nameidone(&nd);
6512                         vnode_put(dvp);
6513                         dvp = NULLVP;
6514
6515                         /*
6516                          * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
6517                          * rather than EACCESS if the target exists.
6518                          */
6519                         NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, UIO_USERSPACE,
6520                                         path, ctx);
6521                         error2 = namei(&nd);
6522                         if (error2) {
6523                                 goto out;
6524                         } else {
6525                                 vp = nd.ni_vp;
6526                                 error = EEXIST;
6527                                 goto out;
6528                         }
6529                 }
6530
6531                 goto out;
6532         }
6533
6534         /*
6535          * make the directory
6536          */
6537         if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
6538                 if (error == EKEEPLOOKING) {
6539                         nd.ni_vp = vp;
6540                         goto continue_lookup;
6541                 }
6542
6543                 goto out;
6544         }
6545
6546         // Make sure the name & parent pointers are hooked up
6547         if (vp->v_name == NULL)
6548                 update_flags |= VNODE_UPDATE_NAME;
6549         if (vp->v_parent == NULLVP)
6550                 update_flags |= VNODE_UPDATE_PARENT;
6551
6552         if (update_flags)
6553                 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
6554
6555 #if CONFIG_FSE
6556         add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
6557 #endif
6558
6559 out:
6560         /*
6561          * nameidone has to happen before we vnode_put(dvp)
6562          * since it may need to release the fs_nodelock on the dvp
6563          */
6564         nameidone(&nd);
6565
6566         if (vp)
6567                 vnode_put(vp);
6568         if (dvp)
6569                 vnode_put(dvp);
6570
6571         return (error);
6572 }
6573
6574 /*
6575  * mkdir_extended: Create a directory; with extended security (ACL).
6576  *
6577  * Parameters:    p                       Process requesting to create the directory
6578  *                uap                     User argument descriptor (see below)
6579  *                retval                  (ignored)
6580  *
6581  * Indirect:      uap->path               Path of directory to create
6582  *                uap->mode               Access permissions to set
6583  *                uap->xsecurity          ACL to set
6584  *
6585  * Returns:        0                      Success
6586  *                !0                      Not success
6587  *
6588  */
6589 int
6590 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
6591 {
6592         int ciferror;
6593         kauth_filesec_t xsecdst;
6594         struct vnode_attr va;
6595
6596         AUDIT_ARG(owner, uap->uid, uap->gid);
6597
6598         xsecdst = NULL;
6599         if ((uap->xsecurity != USER_ADDR_NULL) &&
6600             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
6601                 return ciferror;
6602
6603         VATTR_INIT(&va);
6604         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
6605         if (xsecdst != NULL)
6606                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6607
6608         ciferror = mkdir1(vfs_context_current(), uap->path, &va);
6609         if (xsecdst != NULL)
6610                 kauth_filesec_free(xsecdst);
6611         return ciferror;
6612 }
6613
6614 int
6615 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
6616 {
6617         struct vnode_attr va;
6618
6619         VATTR_INIT(&va);
6620         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
6621
6622         return(mkdir1(vfs_context_current(), uap->path, &va));
6623 }
6624
6625 /*
6626  * Remove a directory file.
6627  */
6628 /* ARGSUSED */
6629 int
6630 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
6631 {
6632         vnode_t vp, dvp;
6633         int error;
6634         struct nameidata nd;
6635         char     *path = NULL;
6636         int       len=0;
6637         int has_listeners = 0;
6638         int need_event = 0;
6639         int truncated = 0;
6640         vfs_context_t ctx = vfs_context_current();
6641 #if CONFIG_FSE
6642         struct vnode_attr va;
6643 #endif /* CONFIG_FSE */
6644         struct vnode_attr *vap = NULL;
6645         int batched;
6646
6647         int restart_flag;
6648
6649         /*
6650          * This loop exists to restart rmdir in the unlikely case that two
6651          * processes are simultaneously trying to remove the same directory
6652          * containing orphaned appleDouble files.
6653          */
6654         do {
6655                 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
6656                        UIO_USERSPACE, uap->path, ctx);
6657                 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
6658 continue_lookup:
6659                 restart_flag = 0;
6660                 vap = NULL;
6661
6662                 error = namei(&nd);
6663                 if (error)
6664                         return (error);
6665
6666                 dvp = nd.ni_dvp;
6667                 vp = nd.ni_vp;
6668
6669                 if (vp) {
6670                         batched = vnode_compound_rmdir_available(vp);
6671
6672                         if (vp->v_flag & VROOT) {
6673                                 /*
6674                                  * The root of a mounted filesystem cannot be deleted.
6675                                  */
6676                                 error = EBUSY;
6677                                 goto out;
6678                         }
6679
6680                         /*
6681                          * Removed a check here; we used to abort if vp's vid
6682                          * was not the same as what we'd seen the last time around.
6683                          * I do not think that check was valid, because if we retry
6684                          * and all dirents are gone, the directory could legitimately
6685                          * be recycled but still be present in a situation where we would
6686                          * have had permission to delete.  Therefore, we won't make
6687                          * an effort to preserve that check now that we may not have a
6688                          * vp here.
6689                          */
6690
6691                         if (!batched) {
6692                                 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
6693                                 if (error) {
6694                                         goto out;
6695                                 }
6696                         }
6697                 } else {
6698                         batched = 1;
6699
6700                         if (!vnode_compound_rmdir_available(dvp)) {
6701                                 panic("No error, but no compound rmdir?");
6702                         }
6703                 }
6704
6705 #if CONFIG_FSE
6706                 fse_info  finfo;
6707
6708                 need_event = need_fsevent(FSE_DELETE, dvp);
6709                 if (need_event) {
6710                         if (!batched) {
6711                                 get_fse_info(vp, &finfo, ctx);
6712                         } else {
6713                                 error = vfs_get_notify_attributes(&va);
6714                                 if (error) {
6715                                         goto out;
6716                                 }
6717
6718                                 vap = &va;
6719                         }
6720                 }
6721 #endif
6722                 has_listeners = kauth_authorize_fileop_has_listeners();
6723                 if (need_event || has_listeners) {
6724                         if (path == NULL) {
6725                                 GET_PATH(path);
6726                                 if (path == NULL) {
6727                                         error = ENOMEM;
6728                                         goto out;
6729                                 }
6730                         }
6731
6732                         len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
6733 #if CONFIG_FSE
6734                         if (truncated) {
6735                                 finfo.mode |= FSE_TRUNCATED_PATH;
6736                         }
6737 #endif
6738                 }
6739
6740                 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
6741                 nd.ni_vp = vp;
6742                 if (vp == NULLVP) {
6743                         /* Couldn't find a vnode */
6744                         goto out;
6745                 }
6746
6747                 if (error == EKEEPLOOKING) {
6748                         goto continue_lookup;
6749                 }
6750
6751                 /*
6752                  * Special case to remove orphaned AppleDouble
6753                  * files. I don't like putting this in the kernel,
6754                  * but carbon does not like putting this in carbon either,
6755                  * so here we are.
6756                  */
6757                 if (error == ENOTEMPTY) {
6758                         error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
6759                         if (error == EBUSY) {
6760                                 goto out;
6761                         }
6762
6763
6764                         /*
6765                          * Assuming everything went well, we will try the RMDIR again
6766                          */
6767                         if (!error)
6768                                 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
6769                 }
6770
6771                 /*
6772                  * Call out to allow 3rd party notification of delete.
6773                  * Ignore result of kauth_authorize_fileop call.
6774                  */
6775                 if (!error) {
6776                         if (has_listeners) {
6777                                 kauth_authorize_fileop(vfs_context_ucred(ctx),
6778                                                 KAUTH_FILEOP_DELETE,
6779                                                 (uintptr_t)vp,
6780                                                 (uintptr_t)path);
6781                         }
6782
6783                         if (vp->v_flag & VISHARDLINK) {
6784                                 // see the comment in unlink1() about why we update
6785                                 // the parent of a hard link when it is removed
6786                                 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
6787                         }
6788
6789 #if CONFIG_FSE
6790                         if (need_event) {
6791                                 if (vap) {
6792                                         vnode_get_fse_info_from_vap(vp, &finfo, vap);
6793                                 }
6794                                 add_fsevent(FSE_DELETE, ctx,
6795                                                 FSE_ARG_STRING, len, path,
6796                                                 FSE_ARG_FINFO, &finfo,
6797                                                 FSE_ARG_DONE);
6798                         }
6799 #endif
6800                 }
6801
6802 out:
6803                 if (path != NULL) {
6804                         RELEASE_PATH(path);
6805                         path = NULL;
6806                 }
6807                 /*
6808                  * nameidone has to happen before we vnode_put(dvp)
6809                  * since it may need to release the fs_nodelock on the dvp
6810                  */
6811                 nameidone(&nd);
6812                 vnode_put(dvp);
6813
6814                 if (vp)
6815                         vnode_put(vp);
6816
6817                 if (restart_flag == 0) {
6818                         wakeup_one((caddr_t)vp);
6819                         return (error);
6820                 }
6821                 tsleep(vp, PVFS, "rm AD", 1);
6822
6823         } while (restart_flag != 0);
6824
6825         return (error);
6826
6827 }
6828
6829 /* Get direntry length padded to 8 byte alignment */
6830 #define DIRENT64_LEN(namlen) \
6831         ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
6832
6833 static errno_t
6834 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
6835                 int *numdirent, vfs_context_t ctxp)
6836 {
6837         /* Check if fs natively supports VNODE_READDIR_EXTENDED */
6838         if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
6839                    ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0))  {
6840                 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
6841         } else {
6842                 size_t bufsize;
6843                 void * bufptr;
6844                 uio_t auio;
6845                 struct direntry entry64;
6846                 struct dirent *dep;
6847                 int bytesread;
6848                 int error;
6849
6850                 /*
6851                  * Our kernel buffer needs to be smaller since re-packing
6852                  * will expand each dirent.  The worse case (when the name
6853                  * length is 3) corresponds to a struct direntry size of 32
6854                  * bytes (8-byte aligned) and a struct dirent size of 12 bytes
6855                  * (4-byte aligned).  So having a buffer that is 3/8 the size
6856                  * will prevent us from reading more than we can pack.
6857                  *
6858                  * Since this buffer is wired memory, we will limit the
6859                  * buffer size to a maximum of 32K. We would really like to
6860                  * use 32K in the MIN(), but we use magic number 87371 to
6861                  * prevent uio_resid() * 3 / 8 from overflowing.
6862                  */
6863                 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
6864                 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
6865                 if (bufptr == NULL) {
6866                         return ENOMEM;
6867                 }
6868
6869                 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
6870                 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
6871                 auio->uio_offset = uio->uio_offset;
6872
6873                 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
6874
6875                 dep = (struct dirent *)bufptr;
6876                 bytesread = bufsize - uio_resid(auio);
6877
6878                 /*
6879                  * Convert all the entries and copy them out to user's buffer.
6880                  */
6881                 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
6882                         /* Convert a dirent to a dirent64. */
6883                         entry64.d_ino = dep->d_ino;
6884                         entry64.d_seekoff = 0;
6885                         entry64.d_reclen = DIRENT64_LEN(dep->d_namlen);
6886                         entry64.d_namlen = dep->d_namlen;
6887                         entry64.d_type = dep->d_type;
6888                         bcopy(dep->d_name, entry64.d_name, dep->d_namlen + 1);
6889
6890                         /* Move to next entry. */
6891                         dep = (struct dirent *)((char *)dep + dep->d_reclen);
6892
6893                         /* Copy entry64 to user's buffer. */
6894                         error = uiomove((caddr_t)&entry64, entry64.d_reclen, uio);
6895                 }
6896
6897                 /* Update the real offset using the offset we got from VNOP_READDIR. */
6898                 if (error == 0) {
6899                         uio->uio_offset = auio->uio_offset;
6900                 }
6901                 uio_free(auio);
6902                 FREE(bufptr, M_TEMP);
6903                 return (error);
6904         }
6905 }
6906
6907 /*
6908  * Read a block of directory entries in a file system independent format.
6909  */
6910 static int
6911 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
6912                      off_t *offset, int flags)
6913 {
6914         vnode_t vp;
6915         struct vfs_context context = *vfs_context_current();    /* local copy */
6916         struct fileproc *fp;
6917         uio_t auio;
6918         int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6919         off_t loff;
6920         int error, eofflag, numdirent;
6921         char uio_buf[ UIO_SIZEOF(1) ];
6922
6923         error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
6924         if (error) {
6925                 return (error);
6926         }
6927         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
6928                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6929                 error = EBADF;
6930                 goto out;
6931         }
6932
6933 #if CONFIG_MACF
6934         error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
6935         if (error)
6936                 goto out;
6937 #endif
6938         if ( (error = vnode_getwithref(vp)) ) {
6939                 goto out;
6940         }
6941         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6942
6943 unionread:
6944         if (vp->v_type != VDIR) {
6945                 (void)vnode_put(vp);
6946                 error = EINVAL;
6947                 goto out;
6948         }
6949
6950 #if CONFIG_MACF
6951         error = mac_vnode_check_readdir(&context, vp);
6952         if (error != 0) {
6953                 (void)vnode_put(vp);
6954                 goto out;
6955         }
6956 #endif /* MAC */
6957
6958         loff = fp->f_fglob->fg_offset;
6959         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
6960         uio_addiov(auio, bufp, bufsize);
6961
6962         if (flags & VNODE_READDIR_EXTENDED) {
6963                 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
6964                 fp->f_fglob->fg_offset = uio_offset(auio);
6965         } else {
6966                 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
6967                 fp->f_fglob->fg_offset = uio_offset(auio);
6968         }
6969         if (error) {
6970                 (void)vnode_put(vp);
6971                 goto out;
6972         }
6973
6974         if ((user_ssize_t)bufsize == uio_resid(auio)){
6975                 if (union_dircheckp) {
6976                         error = union_dircheckp(&vp, fp, &context);
6977                         if (error == -1)
6978                                 goto unionread;
6979                         if (error)
6980                                 goto out;
6981                 }
6982
6983                 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) {
6984                         struct vnode *tvp = vp;
6985                         vp = vp->v_mount->mnt_vnodecovered;
6986                         vnode_getwithref(vp);
6987                         vnode_ref(vp);
6988                         fp->f_fglob->fg_data = (caddr_t) vp;
6989                         fp->f_fglob->fg_offset = 0;
6990                         vnode_rele(tvp);
6991                         vnode_put(tvp);
6992                         goto unionread;
6993                 }
6994         }
6995
6996         vnode_put(vp);
6997         if (offset) {
6998                 *offset = loff;
6999         }
7000
7001         *bytesread = bufsize - uio_resid(auio);
7002 out:
7003         file_drop(fd);
7004         return (error);
7005 }
7006
7007
7008 int
7009 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
7010 {
7011         off_t offset;
7012         ssize_t bytesread;
7013         int error;
7014
7015         AUDIT_ARG(fd, uap->fd);
7016         error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
7017
7018         if (error == 0) {
7019                 if (proc_is64bit(p)) {
7020                         user64_long_t base = (user64_long_t)offset;
7021                         error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
7022                 } else {
7023                         user32_long_t base = (user32_long_t)offset;
7024                         error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
7025                 }
7026                 *retval = bytesread;
7027         }
7028         return (error);
7029 }
7030
7031 int
7032 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
7033 {
7034         off_t offset;
7035         ssize_t bytesread;
7036         int error;
7037
7038         AUDIT_ARG(fd, uap->fd);
7039         error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
7040
7041         if (error == 0) {
7042                 *retval = bytesread;
7043                 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
7044         }
7045         return (error);
7046 }
7047
7048
7049 /*
7050  * Set the mode mask for creation of filesystem nodes.
7051  * XXX implement xsecurity
7052  */
7053 #define UMASK_NOXSECURITY        (void *)1      /* leave existing xsecurity alone */
7054 static int
7055 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
7056 {
7057         struct filedesc *fdp;
7058
7059         AUDIT_ARG(mask, newmask);
7060         proc_fdlock(p);
7061         fdp = p->p_fd;
7062         *retval = fdp->fd_cmask;
7063         fdp->fd_cmask = newmask & ALLPERMS;
7064         proc_fdunlock(p);
7065         return (0);
7066 }
7067
7068 /*
7069  * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
7070  *
7071  * Parameters:    p                       Process requesting to set the umask
7072  *                uap                     User argument descriptor (see below)
7073  *                retval                  umask of the process (parameter p)
7074  *
7075  * Indirect:      uap->newmask            umask to set
7076  *                uap->xsecurity          ACL to set
7077  *
7078  * Returns:        0                      Success
7079  *                !0                      Not success
7080  *
7081  */
7082 int
7083 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
7084 {
7085         int ciferror;
7086         kauth_filesec_t xsecdst;
7087
7088         xsecdst = KAUTH_FILESEC_NONE;
7089         if (uap->xsecurity != USER_ADDR_NULL) {
7090                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
7091                         return ciferror;
7092         } else {
7093                 xsecdst = KAUTH_FILESEC_NONE;
7094         }
7095
7096         ciferror = umask1(p, uap->newmask, xsecdst, retval);
7097
7098         if (xsecdst != KAUTH_FILESEC_NONE)
7099                 kauth_filesec_free(xsecdst);
7100         return ciferror;
7101 }
7102
7103 int
7104 umask(proc_t p, struct umask_args *uap, int32_t *retval)
7105 {
7106         return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
7107 }
7108
7109 /*
7110  * Void all references to file by ripping underlying filesystem
7111  * away from vnode.
7112  */
7113 /* ARGSUSED */
7114 int
7115 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
7116 {
7117         vnode_t vp;
7118         struct vnode_attr va;
7119         vfs_context_t ctx = vfs_context_current();
7120         int error;
7121         struct nameidata nd;
7122
7123         NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
7124                uap->path, ctx);
7125         error = namei(&nd);
7126         if (error)
7127                 return (error);
7128         vp = nd.ni_vp;
7129
7130         nameidone(&nd);
7131
7132         if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
7133                 error = ENOTSUP;
7134                 goto out;
7135         }
7136
7137         if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
7138                 error = EBUSY;
7139                 goto out;
7140         }
7141
7142 #if CONFIG_MACF
7143         error = mac_vnode_check_revoke(ctx, vp);
7144         if (error)
7145                 goto out;
7146 #endif
7147
7148         VATTR_INIT(&va);
7149         VATTR_WANTED(&va, va_uid);
7150         if ((error = vnode_getattr(vp, &va, ctx)))
7151                 goto out;
7152         if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
7153             (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
7154                 goto out;
7155         if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
7156                 VNOP_REVOKE(vp, REVOKEALL, ctx);
7157 out:
7158         vnode_put(vp);
7159         return (error);
7160 }
7161
7162
7163 /*
7164  *  HFS/HFS PlUS SPECIFIC SYSTEM CALLS
7165  *  The following system calls are designed to support features
7166  *  which are specific to the HFS & HFS Plus volume formats
7167  */
7168
7169
7170 /*
7171 * Obtain attribute information on objects in a directory while enumerating
7172 * the directory.  This call does not yet support union mounted directories.
7173 * TO DO
7174 *  1.union mounted directories.
7175 */
7176
7177 /* ARGSUSED */
7178 int
7179 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
7180 {
7181         vnode_t vp;
7182         struct fileproc *fp;
7183         uio_t auio = NULL;
7184         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7185         uint32_t count;
7186         uint32_t newstate;
7187         int error, eofflag;
7188         uint32_t loff;
7189         struct attrlist attributelist;
7190         vfs_context_t ctx = vfs_context_current();
7191         int fd = uap->fd;
7192         char uio_buf[ UIO_SIZEOF(1) ];
7193         kauth_action_t action;
7194
7195         AUDIT_ARG(fd, fd);
7196
7197         /* Get the attributes into kernel space */
7198         if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
7199                 return(error);
7200         }
7201         if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
7202                 return(error);
7203         }
7204         if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
7205                 return (error);
7206         }
7207         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7208                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7209                 error = EBADF;
7210                 goto out;
7211         }
7212
7213
7214 #if CONFIG_MACF
7215         error = mac_file_check_change_offset(vfs_context_ucred(ctx),
7216             fp->f_fglob);
7217         if (error)
7218                 goto out;
7219 #endif
7220
7221
7222         if ( (error = vnode_getwithref(vp)) )
7223                 goto out;
7224
7225         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7226
7227         if (vp->v_type != VDIR) {
7228                 (void)vnode_put(vp);
7229                 error = EINVAL;
7230                 goto out;
7231         }
7232
7233 #if CONFIG_MACF
7234         error = mac_vnode_check_readdir(ctx, vp);
7235         if (error != 0) {
7236                 (void)vnode_put(vp);
7237                 goto out;
7238         }
7239 #endif /* MAC */
7240
7241         /* set up the uio structure which will contain the users return buffer */
7242         loff = fp->f_fglob->fg_offset;
7243         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ,
7244             &uio_buf[0], sizeof(uio_buf));
7245         uio_addiov(auio, uap->buffer, uap->buffersize);
7246
7247         /*
7248          * If the only item requested is file names, we can let that past with
7249          * just LIST_DIRECTORY.  If they want any other attributes, that means
7250          * they need SEARCH as well.
7251          */
7252         action = KAUTH_VNODE_LIST_DIRECTORY;
7253         if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
7254             attributelist.fileattr || attributelist.dirattr)
7255                 action |= KAUTH_VNODE_SEARCH;
7256
7257         if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
7258
7259                 /* Believe it or not, uap->options only has 32-bits of valid
7260                  * info, so truncate before extending again */
7261                 error = VNOP_READDIRATTR(vp, &attributelist, auio,
7262                                          count,
7263                                          (u_long)(uint32_t)uap->options, &newstate, &eofflag,
7264                                          &count, ctx);
7265         }
7266         (void)vnode_put(vp);
7267
7268         if (error)
7269                 goto out;
7270         fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
7271
7272         if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
7273                 goto out;
7274         if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
7275                 goto out;
7276         if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
7277                 goto out;
7278
7279         *retval = eofflag;  /* similar to getdirentries */
7280         error = 0;
7281 out:
7282         file_drop(fd);
7283         return (error); /* return error earlier, an retval of 0 or 1 now */
7284
7285 } /* end of getdirentryattr system call */
7286
7287 /*
7288 * Exchange data between two files
7289 */
7290
7291 /* ARGSUSED */
7292 int
7293 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
7294 {
7295
7296         struct nameidata fnd, snd;
7297         vfs_context_t ctx = vfs_context_current();
7298         vnode_t fvp;
7299         vnode_t svp;
7300         int error;
7301         u_int32_t nameiflags;
7302         char *fpath = NULL;
7303         char *spath = NULL;
7304         int   flen=0, slen=0;
7305         int from_truncated=0, to_truncated=0;
7306 #if CONFIG_FSE
7307         fse_info f_finfo, s_finfo;
7308 #endif
7309
7310         nameiflags = 0;
7311         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
7312
7313         NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
7314                UIO_USERSPACE, uap->path1, ctx);
7315
7316         error = namei(&fnd);
7317         if (error)
7318                 goto out2;
7319
7320         nameidone(&fnd);
7321         fvp = fnd.ni_vp;
7322
7323         NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
7324                UIO_USERSPACE, uap->path2, ctx);
7325
7326         error = namei(&snd);
7327         if (error) {
7328                 vnode_put(fvp);
7329                 goto out2;
7330         }
7331         nameidone(&snd);
7332         svp = snd.ni_vp;
7333
7334         /*
7335          * if the files are the same, return an inval error
7336          */
7337         if (svp == fvp) {
7338                 error = EINVAL;
7339                 goto out;
7340         }
7341
7342         /*
7343          * if the files are on different volumes, return an error
7344          */
7345         if (svp->v_mount != fvp->v_mount) {
7346                 error = EXDEV;
7347                 goto out;
7348         }
7349
7350         /*
7351          * if the two vnodes are not files, return an error.
7352          */
7353         if ( (vnode_isreg(svp) == 0) || (vnode_isreg(fvp) == 0) ) {
7354                 error = EINVAL;
7355                 goto out;
7356         }
7357
7358
7359 #if CONFIG_MACF
7360         error = mac_vnode_check_exchangedata(ctx,
7361             fvp, svp);
7362         if (error)
7363                 goto out;
7364 #endif
7365         if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
7366             ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
7367                 goto out;
7368
7369         if (
7370 #if CONFIG_FSE
7371         need_fsevent(FSE_EXCHANGE, fvp) ||
7372 #endif
7373         kauth_authorize_fileop_has_listeners()) {
7374                 GET_PATH(fpath);
7375                 GET_PATH(spath);
7376                 if (fpath == NULL || spath == NULL) {
7377                         error = ENOMEM;
7378                         goto out;
7379                 }
7380
7381                 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
7382                 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
7383
7384 #if CONFIG_FSE
7385                 get_fse_info(fvp, &f_finfo, ctx);
7386                 get_fse_info(svp, &s_finfo, ctx);
7387                 if (from_truncated || to_truncated) {
7388                         // set it here since only the f_finfo gets reported up to user space
7389                         f_finfo.mode |= FSE_TRUNCATED_PATH;
7390                 }
7391 #endif
7392         }
7393         /* Ok, make the call */
7394         error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
7395
7396         if (error == 0) {
7397             const char *tmpname;
7398
7399             if (fpath != NULL && spath != NULL) {
7400                     /* call out to allow 3rd party notification of exchangedata.
7401                      * Ignore result of kauth_authorize_fileop call.
7402                      */
7403                     kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
7404                                            (uintptr_t)fpath, (uintptr_t)spath);
7405             }
7406             name_cache_lock();
7407
7408             tmpname     = fvp->v_name;
7409             fvp->v_name = svp->v_name;
7410             svp->v_name = tmpname;
7411
7412             if (fvp->v_parent != svp->v_parent) {
7413                 vnode_t tmp;
7414
7415                 tmp           = fvp->v_parent;
7416                 fvp->v_parent = svp->v_parent;
7417                 svp->v_parent = tmp;
7418             }
7419             name_cache_unlock();
7420
7421 #if CONFIG_FSE
7422             if (fpath != NULL && spath != NULL) {
7423                     add_fsevent(FSE_EXCHANGE, ctx,
7424                                 FSE_ARG_STRING, flen, fpath,
7425                                 FSE_ARG_FINFO, &f_finfo,
7426                                 FSE_ARG_STRING, slen, spath,
7427                                 FSE_ARG_FINFO, &s_finfo,
7428                                 FSE_ARG_DONE);
7429             }
7430 #endif
7431         }
7432
7433 out:
7434         if (fpath != NULL)
7435                 RELEASE_PATH(fpath);
7436         if (spath != NULL)
7437                 RELEASE_PATH(spath);
7438         vnode_put(svp);
7439         vnode_put(fvp);
7440 out2:
7441         return (error);
7442 }
7443
7444 #if CONFIG_SEARCHFS
7445
7446 /* ARGSUSED */
7447
7448 int
7449 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
7450 {
7451         vnode_t vp;
7452         int error=0;
7453         int fserror = 0;
7454         struct nameidata nd;
7455         struct user64_fssearchblock searchblock;
7456         struct searchstate *state;
7457         struct attrlist *returnattrs;
7458         struct timeval timelimit;
7459         void *searchparams1,*searchparams2;
7460         uio_t auio = NULL;
7461         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7462         uint32_t nummatches;
7463         int mallocsize;
7464         uint32_t nameiflags;
7465         vfs_context_t ctx = vfs_context_current();
7466         char uio_buf[ UIO_SIZEOF(1) ];
7467
7468         /* Start by copying in fsearchblock paramater list */
7469     if (IS_64BIT_PROCESS(p)) {
7470         error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
7471         timelimit.tv_sec = searchblock.timelimit.tv_sec;
7472         timelimit.tv_usec = searchblock.timelimit.tv_usec;
7473     }
7474     else {
7475         struct user32_fssearchblock tmp_searchblock;
7476
7477         error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
7478         // munge into 64-bit version
7479         searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
7480         searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
7481         searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
7482         searchblock.maxmatches = tmp_searchblock.maxmatches;
7483                 /*
7484                  * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
7485                  * from a 32 bit long, and tv_usec is already a signed 32 bit int.
7486                  */
7487         timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
7488         timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
7489         searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
7490         searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
7491         searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
7492         searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
7493         searchblock.searchattrs = tmp_searchblock.searchattrs;
7494     }
7495         if (error)
7496                 return(error);
7497
7498         /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
7499          */
7500         if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
7501                 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
7502                 return(EINVAL);
7503
7504         /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
7505         /* It all has to do into local memory and it's not that big so we might as well  put it all together. */
7506         /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
7507         /* block.                                                                                             */
7508
7509         mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
7510                       sizeof(struct attrlist) + sizeof(struct searchstate);
7511
7512         MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
7513
7514         /* Now set up the various pointers to the correct place in our newly allocated memory */
7515
7516         searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
7517         returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
7518         state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
7519
7520         /* Now copy in the stuff given our local variables. */
7521
7522         if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
7523                 goto freeandexit;
7524
7525         if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
7526                 goto freeandexit;
7527
7528         if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
7529                 goto freeandexit;
7530
7531         if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
7532                 goto freeandexit;
7533
7534
7535         /*
7536          * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
7537          * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
7538          * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
7539          * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
7540          * validate the user-supplied data offset of the attrreference_t, we'll do it here.
7541          */
7542
7543         if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
7544                 attrreference_t* string_ref;
7545                 u_int32_t* start_length;
7546                 user64_size_t param_length;
7547
7548                 /* validate searchparams1 */
7549                 param_length = searchblock.sizeofsearchparams1;
7550                 /* skip the word that specifies length of the buffer */
7551                 start_length= (u_int32_t*) searchparams1;
7552                 start_length= start_length+1;
7553                 string_ref= (attrreference_t*) start_length;
7554
7555                 /* ensure no negative offsets or too big offsets */
7556                 if (string_ref->attr_dataoffset < 0 ) {
7557                         error = EINVAL;
7558                         goto freeandexit;
7559                 }
7560                 if (string_ref->attr_length > MAXPATHLEN) {
7561                         error = EINVAL;
7562                         goto freeandexit;
7563                 }
7564
7565                 /* Check for pointer overflow in the string ref */
7566                 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
7567                         error = EINVAL;
7568                         goto freeandexit;
7569                 }
7570
7571                 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
7572                         error = EINVAL;
7573                         goto freeandexit;
7574                 }
7575                 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
7576                         error = EINVAL;
7577                         goto freeandexit;
7578                 }
7579         }
7580
7581         /* set up the uio structure which will contain the users return buffer */
7582         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
7583                                                                   &uio_buf[0], sizeof(uio_buf));
7584     uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
7585
7586         nameiflags = 0;
7587         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
7588         NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
7589                UIO_USERSPACE, uap->path, ctx);
7590
7591         error = namei(&nd);
7592         if (error)
7593                 goto freeandexit;
7594
7595         nameidone(&nd);
7596         vp = nd.ni_vp;
7597
7598 #if CONFIG_MACF
7599         error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
7600         if (error) {
7601                 vnode_put(vp);
7602                 goto freeandexit;
7603         }
7604 #endif
7605
7606
7607         /*
7608          * If searchblock.maxmatches == 0, then skip the search. This has happened
7609          * before and sometimes the underlyning code doesnt deal with it well.
7610          */
7611          if (searchblock.maxmatches == 0) {
7612                 nummatches = 0;
7613                 goto saveandexit;
7614          }
7615
7616         /*
7617            Allright, we have everything we need, so lets make that call.
7618
7619            We keep special track of the return value from the file system:
7620            EAGAIN is an acceptable error condition that shouldn't keep us
7621            from copying out any results...
7622          */
7623
7624         fserror = VNOP_SEARCHFS(vp,
7625                                                         searchparams1,
7626                                                         searchparams2,
7627                                                         &searchblock.searchattrs,
7628                                                         (u_long)searchblock.maxmatches,
7629                                                         &timelimit,
7630                                                         returnattrs,
7631                                                         &nummatches,
7632                                                         (u_long)uap->scriptcode,
7633                                                         (u_long)uap->options,
7634                                                         auio,
7635                                                         state,
7636                                                         ctx);
7637
7638 saveandexit:
7639
7640         vnode_put(vp);
7641
7642         /* Now copy out the stuff that needs copying out. That means the number of matches, the
7643            search state.  Everything was already put into he return buffer by the vop call. */
7644
7645         if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
7646                 goto freeandexit;
7647
7648     if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
7649                 goto freeandexit;
7650
7651         error = fserror;
7652
7653 freeandexit:
7654
7655         FREE(searchparams1,M_TEMP);
7656
7657         return(error);
7658
7659
7660 } /* end of searchfs system call */
7661
7662 #else /* CONFIG_SEARCHFS */
7663
7664 int
7665 searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
7666 {
7667         return (ENOTSUP);
7668 }
7669
7670 #endif /* CONFIG_SEARCHFS */
7671
7672
7673 lck_grp_attr_t *  nspace_group_attr;
7674 lck_attr_t *      nspace_lock_attr;
7675 lck_grp_t *       nspace_mutex_group;
7676
7677 lck_mtx_t         nspace_handler_lock;
7678 lck_mtx_t         nspace_handler_exclusion_lock;
7679
7680 time_t snapshot_timestamp=0;
7681 int nspace_allow_virtual_devs=0;
7682
7683 void nspace_handler_init(void);
7684
7685 typedef struct nspace_item_info {
7686         struct vnode *vp;
7687         void         *arg;
7688         uint64_t      op;
7689         uint32_t      vid;
7690         uint32_t      flags;
7691         uint32_t      token;
7692         uint32_t      refcount;
7693 } nspace_item_info;
7694
7695 #define MAX_NSPACE_ITEMS   128
7696 nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
7697 uint32_t      nspace_item_idx=0;              // also used as the sleep/wakeup rendezvous address
7698 uint32_t      nspace_token_id=0;
7699 uint32_t      nspace_handler_timeout = 15;    // seconds
7700
7701 #define NSPACE_ITEM_NEW         0x0001
7702 #define NSPACE_ITEM_PROCESSING  0x0002
7703 #define NSPACE_ITEM_DEAD        0x0004
7704 #define NSPACE_ITEM_CANCELLED   0x0008
7705 #define NSPACE_ITEM_DONE        0x0010
7706 #define NSPACE_ITEM_RESET_TIMER 0x0020
7707
7708 #define NSPACE_ITEM_NSPACE_EVENT   0x0040
7709 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
7710 #define NSPACE_ITEM_TRACK_EVENT    0x0100
7711
7712 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT | NSPACE_ITEM_TRACK_EVENT)
7713
7714 //#pragma optimization_level 0
7715
7716 typedef enum {
7717         NSPACE_HANDLER_NSPACE = 0,
7718         NSPACE_HANDLER_SNAPSHOT = 1,
7719         NSPACE_HANDLER_TRACK = 2,
7720
7721         NSPACE_HANDLER_COUNT,
7722 } nspace_type_t;
7723
7724 typedef struct {
7725         uint64_t handler_tid;
7726         struct proc *handler_proc;
7727         int handler_busy;
7728 } nspace_handler_t;
7729
7730 nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
7731
7732 static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
7733 {
7734         switch(nspace_type) {
7735                 case NSPACE_HANDLER_NSPACE:
7736                         return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
7737                 case NSPACE_HANDLER_SNAPSHOT:
7738                         return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
7739                 case NSPACE_HANDLER_TRACK:
7740                         return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_TRACK_EVENT;
7741                 default:
7742                         printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
7743                         return 0;
7744         }
7745 }
7746
7747 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
7748 {
7749         switch(nspace_type) {
7750                 case NSPACE_HANDLER_NSPACE:
7751                         return NSPACE_ITEM_NSPACE_EVENT;
7752                 case NSPACE_HANDLER_SNAPSHOT:
7753                         return NSPACE_ITEM_SNAPSHOT_EVENT;
7754                 case NSPACE_HANDLER_TRACK:
7755                         return NSPACE_ITEM_TRACK_EVENT;
7756                 default:
7757                         printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
7758                         return 0;
7759         }
7760 }
7761
7762 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
7763 {
7764         switch(nspace_type) {
7765                 case NSPACE_HANDLER_NSPACE:
7766                         return FREAD | FWRITE | O_EVTONLY;
7767                 case NSPACE_HANDLER_SNAPSHOT:
7768                 case NSPACE_HANDLER_TRACK:
7769                         return FREAD | O_EVTONLY;
7770                 default:
7771                         printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
7772                         return 0;
7773         }
7774 }
7775
7776 static inline nspace_type_t nspace_type_for_op(uint64_t op)
7777 {
7778         switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
7779                 case NAMESPACE_HANDLER_NSPACE_EVENT:
7780                         return NSPACE_HANDLER_NSPACE;
7781                 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
7782                         return NSPACE_HANDLER_SNAPSHOT;
7783                 case NAMESPACE_HANDLER_TRACK_EVENT:
7784                         return NSPACE_HANDLER_TRACK;
7785                 default:
7786                         printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
7787                         return NSPACE_HANDLER_NSPACE;
7788         }
7789 }
7790
7791 static inline int nspace_is_special_process(struct proc *proc)
7792 {
7793         int i;
7794         for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
7795                 if (proc == nspace_handlers[i].handler_proc)
7796                         return 1;
7797         }
7798         return 0;
7799 }
7800
7801 void
7802 nspace_handler_init(void)
7803 {
7804         nspace_lock_attr    = lck_attr_alloc_init();
7805         nspace_group_attr   = lck_grp_attr_alloc_init();
7806         nspace_mutex_group  = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
7807         lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
7808         lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
7809         memset(&nspace_items[0], 0, sizeof(nspace_items));
7810 }
7811
7812 void
7813 nspace_proc_exit(struct proc *p)
7814 {
7815         int i, event_mask = 0;
7816
7817         for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
7818                 if (p == nspace_handlers[i].handler_proc) {
7819                         event_mask |= nspace_item_flags_for_type(i);
7820                         nspace_handlers[i].handler_tid = 0;
7821                         nspace_handlers[i].handler_proc = NULL;
7822                 }
7823         }
7824
7825         if (event_mask == 0) {
7826                 return;
7827         }
7828
7829         if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
7830                 // if this process was the snapshot handler, zero snapshot_timeout
7831                 snapshot_timestamp = 0;
7832         }
7833
7834         //
7835         // unblock anyone that's waiting for the handler that died
7836         //
7837         lck_mtx_lock(&nspace_handler_lock);
7838         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
7839                 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
7840
7841                         if ( nspace_items[i].flags & event_mask ) {
7842
7843                                 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
7844                                         vnode_lock_spin(nspace_items[i].vp);
7845                                         nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
7846                                         vnode_unlock(nspace_items[i].vp);
7847                                 }
7848                                 nspace_items[i].vp = NULL;
7849                                 nspace_items[i].vid = 0;
7850                                 nspace_items[i].flags = NSPACE_ITEM_DONE;
7851                                 nspace_items[i].token = 0;
7852
7853                                 wakeup((caddr_t)&(nspace_items[i].vp));
7854                         }
7855                 }
7856         }
7857
7858         wakeup((caddr_t)&nspace_item_idx);
7859         lck_mtx_unlock(&nspace_handler_lock);
7860 }
7861
7862
7863 int
7864 resolve_nspace_item(struct vnode *vp, uint64_t op)
7865 {
7866         return resolve_nspace_item_ext(vp, op, NULL);
7867 }
7868
7869 int
7870 resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
7871 {
7872         int i, error, keep_waiting;
7873         struct timespec ts;
7874         nspace_type_t nspace_type = nspace_type_for_op(op);
7875
7876         // only allow namespace events on regular files, directories and symlinks.
7877         if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
7878                 return 0;
7879         }
7880
7881         //
7882         // if this is a snapshot event and the vnode is on a
7883         // disk image just pretend nothing happened since any
7884         // change to the disk image will cause the disk image
7885         // itself to get backed up and this avoids multi-way
7886         // deadlocks between the snapshot handler and the ever
7887         // popular diskimages-helper process.  the variable
7888         // nspace_allow_virtual_devs allows this behavior to
7889         // be overridden (for use by the Mobile TimeMachine
7890         // testing infrastructure which uses disk images)
7891         //
7892         if (   (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
7893             && (vp->v_mount != NULL)
7894             && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
7895             && !nspace_allow_virtual_devs) {
7896
7897                 return 0;
7898         }
7899
7900         // if (thread_tid(current_thread()) == namespace_handler_tid) {
7901         if (nspace_handlers[nspace_type].handler_proc == NULL) {
7902                 return 0;
7903         }
7904
7905         if (nspace_is_special_process(current_proc())) {
7906                 return EDEADLK;
7907         }
7908
7909         lck_mtx_lock(&nspace_handler_lock);
7910
7911 retry:
7912         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
7913                 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
7914                         break;
7915                 }
7916         }
7917
7918         if (i >= MAX_NSPACE_ITEMS) {
7919                 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
7920                         if (nspace_items[i].flags == 0) {
7921                                 break;
7922                         }
7923                 }
7924         } else {
7925                 nspace_items[i].refcount++;
7926         }
7927
7928         if (i >= MAX_NSPACE_ITEMS) {
7929                 ts.tv_sec = nspace_handler_timeout;
7930                 ts.tv_nsec = 0;
7931
7932                 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
7933                 if (error == 0) {
7934                         // an entry got free'd up, go see if we can get a slot
7935                         goto retry;
7936                 } else {
7937                         lck_mtx_unlock(&nspace_handler_lock);
7938                         return error;
7939                 }
7940         }
7941
7942         //
7943         // if it didn't already exist, add it.  if it did exist
7944         // we'll get woken up when someone does a wakeup() on
7945         // the slot in the nspace_items table.
7946         //
7947         if (vp != nspace_items[i].vp) {
7948                 nspace_items[i].vp = vp;
7949                 nspace_items[i].arg = arg;
7950                 nspace_items[i].op = op;
7951                 nspace_items[i].vid = vnode_vid(vp);
7952                 nspace_items[i].flags = NSPACE_ITEM_NEW;
7953                 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
7954                 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
7955                         if (arg) {
7956                                 vnode_lock_spin(vp);
7957                                 vp->v_flag |= VNEEDSSNAPSHOT;
7958                                 vnode_unlock(vp);
7959                         }
7960                 }
7961
7962                 nspace_items[i].token = 0;
7963                 nspace_items[i].refcount = 1;
7964
7965                 wakeup((caddr_t)&nspace_item_idx);
7966         }
7967
7968         //
7969         // Now go to sleep until the handler does a wakeup on this
7970         // slot in the nspace_items table (or we timeout).
7971         //
7972         keep_waiting = 1;
7973         while(keep_waiting) {
7974                 ts.tv_sec = nspace_handler_timeout;
7975                 ts.tv_nsec = 0;
7976                 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
7977
7978                 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
7979                         error = 0;
7980                 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
7981                         error = nspace_items[i].token;
7982                 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
7983                         if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
7984                                 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
7985                                 continue;
7986                         } else {
7987                                 error = ETIMEDOUT;
7988                         }
7989                 } else if (error == 0) {
7990                         // hmmm, why did we get woken up?
7991                         printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
7992                                nspace_items[i].token);
7993                 }
7994
7995                 if (--nspace_items[i].refcount == 0) {
7996                         nspace_items[i].vp = NULL;     // clear this so that no one will match on it again
7997                         nspace_items[i].arg = NULL;
7998                         nspace_items[i].token = 0;     // clear this so that the handler will not find it anymore
7999                         nspace_items[i].flags = 0;     // this clears it for re-use
8000                 }
8001                 wakeup(&nspace_token_id);
8002                 keep_waiting = 0;
8003         }
8004
8005         lck_mtx_unlock(&nspace_handler_lock);
8006
8007         return error;
8008 }
8009
8010
8011 int
8012 get_nspace_item_status(struct vnode *vp, int32_t *status)
8013 {
8014         int i;
8015
8016         lck_mtx_lock(&nspace_handler_lock);
8017         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8018                 if (nspace_items[i].vp == vp) {
8019                         break;
8020                 }
8021         }
8022
8023         if (i >= MAX_NSPACE_ITEMS) {
8024                 lck_mtx_unlock(&nspace_handler_lock);
8025                 return ENOENT;
8026         }
8027
8028         *status = nspace_items[i].flags;
8029         lck_mtx_unlock(&nspace_handler_lock);
8030         return 0;
8031 }
8032
8033
8034 #if 0
8035 static int
8036 build_volfs_path(struct vnode *vp, char *path, int *len)
8037 {
8038         struct vnode_attr va;
8039         int ret;
8040
8041         VATTR_INIT(&va);
8042         VATTR_WANTED(&va, va_fsid);
8043         VATTR_WANTED(&va, va_fileid);
8044
8045         if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
8046                 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
8047                 ret = -1;
8048         } else {
8049                 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
8050                 ret = 0;
8051         }
8052
8053         return ret;
8054 }
8055 #endif
8056
8057 //
8058 // Note: this function does NOT check permissions on all of the
8059 // parent directories leading to this vnode.  It should only be
8060 // called on behalf of a root process.  Otherwise a process may
8061 // get access to a file because the file itself is readable even
8062 // though its parent directories would prevent access.
8063 //
8064 static int
8065 vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
8066 {
8067         int error, action;
8068
8069         if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8070                 return error;
8071         }
8072
8073 #if CONFIG_MACF
8074         error = mac_vnode_check_open(ctx, vp, fmode);
8075         if (error)
8076                 return error;
8077 #endif
8078
8079         /* compute action to be authorized */
8080         action = 0;
8081         if (fmode & FREAD) {
8082                 action |= KAUTH_VNODE_READ_DATA;
8083         }
8084         if (fmode & (FWRITE | O_TRUNC)) {
8085                 /*
8086                  * If we are writing, appending, and not truncating,
8087                  * indicate that we are appending so that if the
8088                  * UF_APPEND or SF_APPEND bits are set, we do not deny
8089                  * the open.
8090                  */
8091                 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
8092                         action |= KAUTH_VNODE_APPEND_DATA;
8093                 } else {
8094                         action |= KAUTH_VNODE_WRITE_DATA;
8095                 }
8096         }
8097
8098         if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
8099                 return error;
8100
8101
8102         //
8103         // if the vnode is tagged VOPENEVT and the current process
8104         // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
8105         // flag to the open mode so that this open won't count against
8106         // the vnode when carbon delete() does a vnode_isinuse() to see
8107         // if a file is currently in use.  this allows spotlight
8108         // importers to not interfere with carbon apps that depend on
8109         // the no-delete-if-busy semantics of carbon delete().
8110         //
8111         if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
8112                 fmode |= O_EVTONLY;
8113         }
8114
8115         if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
8116                 return error;
8117         }
8118         if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
8119                 VNOP_CLOSE(vp, fmode, ctx);
8120                 return error;
8121         }
8122
8123         /* call out to allow 3rd party notification of open.
8124          * Ignore result of kauth_authorize_fileop call.
8125          */
8126         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
8127                                (uintptr_t)vp, 0);
8128
8129
8130         return 0;
8131 }
8132
8133 static int
8134 wait_for_namespace_event(namespace_handler_info_ext *nhi, nspace_type_t nspace_type)
8135 {
8136         int i, error=0, unblock=0;
8137         task_t curtask;
8138
8139         lck_mtx_lock(&nspace_handler_exclusion_lock);
8140         if (nspace_handlers[nspace_type].handler_busy) {
8141                 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8142                 return EBUSY;
8143         }
8144         nspace_handlers[nspace_type].handler_busy = 1;
8145         lck_mtx_unlock(&nspace_handler_exclusion_lock);
8146
8147         /*
8148          * Any process that gets here will be one of the namespace handlers.
8149          * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
8150          * as we can cause deadlocks to occur, because the namespace handler may prevent
8151          * VNOP_INACTIVE from proceeding.  Mark the current task as a P_DEPENDENCY_CAPABLE
8152          * process.
8153          */
8154         curtask = current_task();
8155         bsd_set_dependency_capable (curtask);
8156
8157         lck_mtx_lock(&nspace_handler_lock);
8158         if (nspace_handlers[nspace_type].handler_proc == NULL) {
8159                 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
8160                 nspace_handlers[nspace_type].handler_proc = current_proc();
8161         }
8162
8163         while (error == 0) {
8164
8165                 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8166                         if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
8167                                 if (!nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
8168                                         continue;
8169                                 }
8170                                 break;
8171                         }
8172                 }
8173
8174                 if (i < MAX_NSPACE_ITEMS) {
8175                         nspace_items[i].flags  &= ~NSPACE_ITEM_NEW;
8176                         nspace_items[i].flags  |= NSPACE_ITEM_PROCESSING;
8177                         nspace_items[i].token  = ++nspace_token_id;
8178
8179                         if (nspace_items[i].vp) {
8180                                 struct fileproc *fp;
8181                                 int32_t indx, fmode;
8182                                 struct proc *p = current_proc();
8183                                 vfs_context_t ctx = vfs_context_current();
8184
8185                                 fmode = nspace_open_flags_for_type(nspace_type);
8186
8187                                 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
8188                                 if (error) {
8189                                         unblock = 1;
8190                                         break;
8191                                 }
8192                                 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
8193                                 if (error) {
8194                                         unblock = 1;
8195                                         vnode_put(nspace_items[i].vp);
8196                                         break;
8197                                 }
8198
8199                                 if ((error = falloc(p, &fp, &indx, ctx))) {
8200                                         vn_close(nspace_items[i].vp, fmode, ctx);
8201                                         vnode_put(nspace_items[i].vp);
8202                                         unblock = 1;
8203                                         break;
8204                                 }
8205
8206                                 fp->f_fglob->fg_flag = fmode;
8207                                 fp->f_fglob->fg_type = DTYPE_VNODE;
8208                                 fp->f_fglob->fg_ops = &vnops;
8209                                 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
8210
8211                                 proc_fdlock(p);
8212                                 procfdtbl_releasefd(p, indx, NULL);
8213                                 fp_drop(p, indx, fp, 1);
8214                                 proc_fdunlock(p);
8215
8216                                 error = copyout(&nspace_items[i].token, nhi->token, sizeof(uint32_t));
8217                                 error = copyout(&nspace_items[i].op, nhi->flags, sizeof(uint64_t));
8218                                 error = copyout(&indx, nhi->fdptr, sizeof(uint32_t));
8219                                 if (nhi->infoptr) {
8220                                         uio_t uio = (uio_t)nspace_items[i].arg;
8221                                         uint64_t u_offset, u_length;
8222
8223                                         if (uio) {
8224                                                 u_offset = uio_offset(uio);
8225                                                 u_length = uio_resid(uio);
8226                                         } else {
8227                                                 u_offset = 0;
8228                                                 u_length = 0;
8229                                         }
8230                                         error = copyout(&u_offset, nhi->infoptr, sizeof(uint64_t));
8231                                         error = copyout(&u_length, nhi->infoptr+sizeof(uint64_t), sizeof(uint64_t));
8232                                 }
8233                                 if (error) {
8234                                         vn_close(nspace_items[i].vp, fmode, ctx);
8235                                         fp_free(p, indx, fp);
8236                                         unblock = 1;
8237                                 }
8238
8239                                 vnode_put(nspace_items[i].vp);
8240
8241                                 break;
8242                         } else {
8243                                 printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
8244                                        i, nspace_items[i].vp, error, nspace_items[i].vp->v_name);
8245                         }
8246
8247                 } else {
8248                         error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
8249                         if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
8250                                 error = EINVAL;
8251                                 break;
8252                         }
8253
8254                 }
8255         }
8256
8257         if (unblock) {
8258                 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
8259                         vnode_lock_spin(nspace_items[i].vp);
8260                         nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8261                         vnode_unlock(nspace_items[i].vp);
8262                 }
8263                 nspace_items[i].vp = NULL;
8264                 nspace_items[i].vid = 0;
8265                 nspace_items[i].flags = NSPACE_ITEM_DONE;
8266                 nspace_items[i].token = 0;
8267
8268                 wakeup((caddr_t)&(nspace_items[i].vp));
8269         }
8270
8271         if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
8272                 // just go through every snapshot event and unblock it immediately.
8273                 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
8274                         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8275                                 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
8276                                         if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
8277                                                 nspace_items[i].vp = NULL;
8278                                                 nspace_items[i].vid = 0;
8279                                                 nspace_items[i].flags = NSPACE_ITEM_DONE;
8280                                                 nspace_items[i].token = 0;
8281
8282                                                 wakeup((caddr_t)&(nspace_items[i].vp));
8283                                         }
8284                                 }
8285                         }
8286                 }
8287         }
8288
8289         lck_mtx_unlock(&nspace_handler_lock);
8290
8291         lck_mtx_lock(&nspace_handler_exclusion_lock);
8292         nspace_handlers[nspace_type].handler_busy = 0;
8293         lck_mtx_unlock(&nspace_handler_exclusion_lock);
8294
8295         return error;
8296 }
8297
8298
8299 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
8300 {
8301         int error = 0;
8302         namespace_handler_info_ext nhi;
8303
8304         if (nspace_type == NSPACE_HANDLER_SNAPSHOT && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
8305                 return EINVAL;
8306         }
8307
8308         if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8309                 return error;
8310         }
8311
8312         if (   (is64bit && size != sizeof(user64_namespace_handler_info) && size != sizeof(user64_namespace_handler_info_ext))
8313             || (is64bit == 0 && size != sizeof(user32_namespace_handler_info) && size != sizeof(user32_namespace_handler_info_ext))) {
8314
8315                 // either you're 64-bit and passed a 64-bit struct or
8316                 // you're 32-bit and passed a 32-bit struct.  otherwise
8317                 // it's not ok.
8318                 return EINVAL;
8319         }
8320
8321         if (is64bit) {
8322                 nhi.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
8323                 nhi.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
8324                 nhi.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
8325                 if (size == sizeof(user64_namespace_handler_info_ext)) {
8326                         nhi.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
8327                 } else {
8328                         nhi.infoptr = 0;
8329                 }
8330         } else {
8331                 nhi.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
8332                 nhi.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
8333                 nhi.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
8334                 if (size == sizeof(user32_namespace_handler_info_ext)) {
8335                         nhi.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
8336                 } else {
8337                         nhi.infoptr = 0;
8338                 }
8339         }
8340
8341         return wait_for_namespace_event(&nhi, nspace_type);
8342 }
8343
8344 /*
8345  * Make a filesystem-specific control call:
8346  */
8347 /* ARGSUSED */
8348 static int
8349 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
8350 {
8351         int error=0;
8352         boolean_t is64bit;
8353         u_int size;
8354 #define STK_PARAMS 128
8355         char stkbuf[STK_PARAMS];
8356         caddr_t data, memp;
8357         vnode_t vp = *arg_vp;
8358
8359         size = IOCPARM_LEN(cmd);
8360         if (size > IOCPARM_MAX) return (EINVAL);
8361
8362         is64bit = proc_is64bit(p);
8363
8364         memp = NULL;
8365         if (size > sizeof (stkbuf)) {
8366                 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
8367                 data = memp;
8368         } else {
8369                 data = &stkbuf[0];
8370         };
8371
8372         if (cmd & IOC_IN) {
8373                 if (size) {
8374                         error = copyin(udata, data, size);
8375                         if (error) goto FSCtl_Exit;
8376                 } else {
8377                         if (is64bit) {
8378                                 *(user_addr_t *)data = udata;
8379                         }
8380                         else {
8381                                 *(uint32_t *)data = (uint32_t)udata;
8382                         }
8383                 };
8384         } else if ((cmd & IOC_OUT) && size) {
8385                 /*
8386                  * Zero the buffer so the user always
8387                  * gets back something deterministic.
8388                  */
8389                 bzero(data, size);
8390         } else if (cmd & IOC_VOID) {
8391                 if (is64bit) {
8392                         *(user_addr_t *)data = udata;
8393                 }
8394                 else {
8395                         *(uint32_t *)data = (uint32_t)udata;
8396                 }
8397         }
8398
8399         /* Check to see if it's a generic command */
8400         if (IOCBASECMD(cmd) == FSCTL_SYNC_VOLUME) {
8401                 mount_t mp = vp->v_mount;
8402                 int arg = *(uint32_t*)data;
8403
8404                 /* record vid of vp so we can drop it below. */
8405                 uint32_t vvid = vp->v_id;
8406
8407                 /*
8408                  * Then grab mount_iterref so that we can release the vnode.
8409                  * Without this, a thread may call vnode_iterate_prepare then
8410                  * get into a deadlock because we've never released the root vp
8411                  */
8412                 error = mount_iterref (mp, 0);
8413                 if (error)  {
8414                         goto FSCtl_Exit;
8415                 }
8416                 vnode_put(vp);
8417
8418                 /* issue the sync for this volume */
8419                 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
8420
8421                 /*
8422                  * Then release the mount_iterref once we're done syncing; it's not
8423                  * needed for the VNOP_IOCTL below
8424                  */
8425                 mount_iterdrop(mp);
8426
8427                 if (arg & FSCTL_SYNC_FULLSYNC) {
8428                         /* re-obtain vnode iocount on the root vp, if possible */
8429                         error = vnode_getwithvid (vp, vvid);
8430                         if (error == 0) {
8431                                 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
8432                                 vnode_put (vp);
8433                         }
8434                 }
8435                 /* mark the argument VP as having been released */
8436                 *arg_vp = NULL;
8437
8438         } else if (IOCBASECMD(cmd) == FSCTL_SET_PACKAGE_EXTS) {
8439                 user_addr_t ext_strings;
8440                 uint32_t    num_entries;
8441                 uint32_t    max_width;
8442
8443                 if (   (is64bit && size != sizeof(user64_package_ext_info))
8444                    || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
8445
8446                         // either you're 64-bit and passed a 64-bit struct or
8447                         // you're 32-bit and passed a 32-bit struct.  otherwise
8448                         // it's not ok.
8449                         error = EINVAL;
8450                         goto FSCtl_Exit;
8451                 }
8452
8453                 if (is64bit) {
8454                         ext_strings = ((user64_package_ext_info *)data)->strings;
8455                         num_entries = ((user64_package_ext_info *)data)->num_entries;
8456                         max_width   = ((user64_package_ext_info *)data)->max_width;
8457                 } else {
8458                         ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
8459                         num_entries = ((user32_package_ext_info *)data)->num_entries;
8460                         max_width   = ((user32_package_ext_info *)data)->max_width;
8461                 }
8462
8463                 error = set_package_extensions_table(ext_strings, num_entries, max_width);
8464
8465         } else if (IOCBASECMD(cmd) == FSCTL_WAIT_FOR_SYNC) {
8466                 error = tsleep((caddr_t)&sync_wait_time, PVFS|PCATCH, "sync-wait", 0);
8467                 if (error == 0) {
8468                         *(uint32_t *)data = (uint32_t)sync_wait_time;
8469                         error = 0;
8470                 } else {
8471                         error *= -1;
8472                 }
8473
8474         } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_GET) {
8475                 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
8476         } else if (IOCBASECMD(cmd) == FSCTL_OLD_SNAPSHOT_HANDLER_GET) {
8477                 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
8478         } else if (IOCBASECMD(cmd) == FSCTL_SNAPSHOT_HANDLER_GET_EXT) {
8479                 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
8480         } else if (IOCBASECMD(cmd) == FSCTL_TRACKED_HANDLER_GET) {
8481                 error = process_namespace_fsctl(NSPACE_HANDLER_TRACK, is64bit, size, data);
8482         } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_UPDATE) {
8483                 uint32_t token, val;
8484                 int i;
8485
8486                 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
8487                         goto FSCtl_Exit;
8488                 }
8489
8490                 if (!nspace_is_special_process(p)) {
8491                         error = EINVAL;
8492                         goto FSCtl_Exit;
8493                 }
8494
8495                 token = ((uint32_t *)data)[0];
8496                 val   = ((uint32_t *)data)[1];
8497
8498                 lck_mtx_lock(&nspace_handler_lock);
8499
8500                 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8501                         if (nspace_items[i].token == token) {
8502                                 break;
8503                         }
8504                 }
8505
8506                 if (i >= MAX_NSPACE_ITEMS) {
8507                         error = ENOENT;
8508                 } else {
8509                         //
8510                         // if this bit is set, when resolve_nspace_item() times out
8511                         // it will loop and go back to sleep.
8512                         //
8513                         nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
8514                 }
8515
8516                 lck_mtx_unlock(&nspace_handler_lock);
8517
8518                 if (error) {
8519                         printf("nspace-handler-update: did not find token %u\n", token);
8520                 }
8521
8522         } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_UNBLOCK) {
8523                 uint32_t token, val;
8524                 int i;
8525
8526                 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
8527                         goto FSCtl_Exit;
8528                 }
8529
8530                 if (!nspace_is_special_process(p)) {
8531                         error = EINVAL;
8532                         goto FSCtl_Exit;
8533                 }
8534
8535                 token = ((uint32_t *)data)[0];
8536                 val   = ((uint32_t *)data)[1];
8537
8538                 lck_mtx_lock(&nspace_handler_lock);
8539
8540                 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8541                         if (nspace_items[i].token == token) {
8542                                 break;
8543                         }
8544                 }
8545
8546                 if (i >= MAX_NSPACE_ITEMS) {
8547                         printf("nspace-handler-unblock: did not find token %u\n", token);
8548                         error = ENOENT;
8549                 } else {
8550                         if (val == 0 && nspace_items[i].vp) {
8551                                 vnode_lock_spin(nspace_items[i].vp);
8552                                 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8553                                 vnode_unlock(nspace_items[i].vp);
8554                         }
8555
8556                         nspace_items[i].vp = NULL;
8557                         nspace_items[i].arg = NULL;
8558                         nspace_items[i].op = 0;
8559                         nspace_items[i].vid = 0;
8560                         nspace_items[i].flags = NSPACE_ITEM_DONE;
8561                         nspace_items[i].token = 0;
8562
8563                         wakeup((caddr_t)&(nspace_items[i].vp));
8564                 }
8565
8566                 lck_mtx_unlock(&nspace_handler_lock);
8567
8568         } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_CANCEL) {
8569                 uint32_t token, val;
8570                 int i;
8571
8572                 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
8573                         goto FSCtl_Exit;
8574                 }
8575
8576                 if (!nspace_is_special_process(p)) {
8577                         error = EINVAL;
8578                         goto FSCtl_Exit;
8579                 }
8580
8581                 token = ((uint32_t *)data)[0];
8582                 val   = ((uint32_t *)data)[1];
8583
8584                 lck_mtx_lock(&nspace_handler_lock);
8585
8586                 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8587                         if (nspace_items[i].token == token) {
8588                                 break;
8589                         }
8590                 }
8591
8592                 if (i >= MAX_NSPACE_ITEMS) {
8593                         printf("nspace-handler-cancel: did not find token %u\n", token);
8594                         error = ENOENT;
8595                 } else {
8596                         if (nspace_items[i].vp) {
8597                                 vnode_lock_spin(nspace_items[i].vp);
8598                                 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8599                                 vnode_unlock(nspace_items[i].vp);
8600                         }
8601
8602                         nspace_items[i].vp = NULL;
8603                         nspace_items[i].arg = NULL;
8604                         nspace_items[i].vid = 0;
8605                         nspace_items[i].token = val;
8606                         nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
8607                         nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
8608
8609                         wakeup((caddr_t)&(nspace_items[i].vp));
8610                 }
8611
8612                 lck_mtx_unlock(&nspace_handler_lock);
8613         } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME) {
8614                 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8615                         goto FSCtl_Exit;
8616                 }
8617
8618                 // we explicitly do not do the namespace_handler_proc check here
8619
8620                 lck_mtx_lock(&nspace_handler_lock);
8621                 snapshot_timestamp = ((uint32_t *)data)[0];
8622                 wakeup(&nspace_item_idx);
8623                 lck_mtx_unlock(&nspace_handler_lock);
8624                 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
8625
8626         } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS) {
8627                 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8628                         goto FSCtl_Exit;
8629                 }
8630
8631                 lck_mtx_lock(&nspace_handler_lock);
8632                 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
8633                 lck_mtx_unlock(&nspace_handler_lock);
8634                 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
8635                        nspace_allow_virtual_devs ? "" : " NOT");
8636                 error = 0;
8637
8638         } else if (IOCBASECMD(cmd) == FSCTL_SET_FSTYPENAME_OVERRIDE) {
8639                 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8640                         goto FSCtl_Exit;
8641                 }
8642                 if (vp->v_mount) {
8643                         mount_lock(vp->v_mount);
8644                         if (data[0] != 0) {
8645                                 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
8646                                 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
8647                                 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
8648                                         vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
8649                                         vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
8650                                 }
8651                         } else {
8652                                 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
8653                                         vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
8654                                 }
8655                                 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
8656                                 vp->v_mount->fstypename_override[0] = '\0';
8657                         }
8658                         mount_unlock(vp->v_mount);
8659                 }
8660         } else {
8661                 /* Invoke the filesystem-specific code */
8662                 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
8663         }
8664
8665
8666         /*
8667          * Copy any data to user, size was
8668          * already set and checked above.
8669          */
8670         if (error == 0 && (cmd & IOC_OUT) && size)
8671                 error = copyout(data, udata, size);
8672
8673 FSCtl_Exit:
8674         if (memp) kfree(memp, size);
8675
8676         return error;
8677 }
8678
8679 /* ARGSUSED */
8680 int
8681 fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
8682 {
8683         int error;
8684         struct nameidata nd;
8685         u_long nameiflags;
8686         vnode_t vp = NULL;
8687         vfs_context_t ctx = vfs_context_current();
8688
8689         AUDIT_ARG(cmd, uap->cmd);
8690         AUDIT_ARG(value32, uap->options);
8691         /* Get the vnode for the file we are getting info on:  */
8692         nameiflags = 0;
8693         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8694         NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
8695                UIO_USERSPACE, uap->path, ctx);
8696         if ((error = namei(&nd))) goto done;
8697         vp = nd.ni_vp;
8698         nameidone(&nd);
8699
8700 #if CONFIG_MACF
8701         error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
8702         if (error) {
8703                 goto done;
8704         }
8705 #endif
8706
8707         error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
8708
8709 done:
8710         if (vp)
8711                 vnode_put(vp);
8712         return error;
8713 }
8714 /* ARGSUSED */
8715 int
8716 ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
8717 {
8718         int error;
8719         vnode_t vp = NULL;
8720         vfs_context_t ctx = vfs_context_current();
8721         int fd = -1;
8722
8723         AUDIT_ARG(fd, uap->fd);
8724         AUDIT_ARG(cmd, uap->cmd);
8725         AUDIT_ARG(value32, uap->options);
8726
8727         /* Get the vnode for the file we are getting info on:  */
8728         if ((error = file_vnode(uap->fd, &vp)))
8729                 goto done;
8730         fd = uap->fd;
8731         if ((error = vnode_getwithref(vp))) {
8732                 goto done;
8733         }
8734
8735 #if CONFIG_MACF
8736         error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
8737         if (error) {
8738                 goto done;
8739         }
8740 #endif
8741
8742         error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
8743
8744 done:
8745         if (fd != -1)
8746                 file_drop(fd);
8747
8748         if (vp)
8749                 vnode_put(vp);
8750         return error;
8751 }
8752 /* end of fsctl system call */
8753
8754 /*
8755  * An in-kernel sync for power management to call.
8756  */
8757 __private_extern__ int
8758 sync_internal(void)
8759 {
8760         int error;
8761
8762         struct sync_args data;
8763
8764         int retval[2];
8765
8766
8767         error = sync(current_proc(), &data, &retval[0]);
8768
8769
8770         return (error);
8771 } /* end of sync_internal call */
8772
8773
8774 /*
8775  *  Retrieve the data of an extended attribute.
8776  */
8777 int
8778 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
8779 {
8780         vnode_t vp;
8781         struct nameidata nd;
8782         char attrname[XATTR_MAXNAMELEN+1];
8783         vfs_context_t ctx = vfs_context_current();
8784         uio_t auio = NULL;
8785         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8786         size_t attrsize = 0;
8787         size_t namelen;
8788         u_int32_t nameiflags;
8789         int error;
8790         char uio_buf[ UIO_SIZEOF(1) ];
8791
8792         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
8793                 return (EINVAL);
8794
8795         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
8796         NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
8797         if ((error = namei(&nd))) {
8798                 return (error);
8799         }
8800         vp = nd.ni_vp;
8801         nameidone(&nd);
8802
8803         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
8804                 goto out;
8805         }
8806         if (xattr_protected(attrname)) {
8807                 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
8808                         error = EPERM;
8809                         goto out;
8810                 }
8811         }
8812         /*
8813          * the specific check for 0xffffffff is a hack to preserve
8814          * binaray compatibilty in K64 with applications that discovered
8815          * that passing in a buf pointer and a size of -1 resulted in
8816          * just the size of the indicated extended attribute being returned.
8817          * this isn't part of the documented behavior, but because of the
8818          * original implemtation's check for "uap->size > 0", this behavior
8819          * was allowed. In K32 that check turned into a signed comparison
8820          * even though uap->size is unsigned...  in K64, we blow by that
8821          * check because uap->size is unsigned and doesn't get sign smeared
8822          * in the munger for a 32 bit user app.  we also need to add a
8823          * check to limit the maximum size of the buffer being passed in...
8824          * unfortunately, the underlying fileystems seem to just malloc
8825          * the requested size even if the actual extended attribute is tiny.
8826          * because that malloc is for kernel wired memory, we have to put a
8827          * sane limit on it.
8828          *
8829          * U32 running on K64 will yield 0x00000000ffffffff for uap->size
8830          * U64 running on K64 will yield -1 (64 bits wide)
8831          * U32/U64 running on K32 will yield -1 (32 bits wide)
8832          */
8833         if (uap->size == 0xffffffff || uap->size == (size_t)-1)
8834                 goto no_uio;
8835
8836         if (uap->value) {
8837                 if (uap->size > (size_t)XATTR_MAXSIZE)
8838                         uap->size = XATTR_MAXSIZE;
8839
8840                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
8841                                             &uio_buf[0], sizeof(uio_buf));
8842                 uio_addiov(auio, uap->value, uap->size);
8843         }
8844 no_uio:
8845         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
8846 out:
8847         vnode_put(vp);
8848
8849         if (auio) {
8850                 *retval = uap->size - uio_resid(auio);
8851         } else {
8852                 *retval = (user_ssize_t)attrsize;
8853         }
8854
8855         return (error);
8856 }
8857
8858 /*
8859  * Retrieve the data of an extended attribute.
8860  */
8861 int
8862 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
8863 {
8864         vnode_t vp;
8865         char attrname[XATTR_MAXNAMELEN+1];
8866         uio_t auio = NULL;
8867         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8868         size_t attrsize = 0;
8869         size_t namelen;
8870         int error;
8871         char uio_buf[ UIO_SIZEOF(1) ];
8872
8873         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
8874                 return (EINVAL);
8875
8876         if ( (error = file_vnode(uap->fd, &vp)) ) {
8877                 return (error);
8878         }
8879         if ( (error = vnode_getwithref(vp)) ) {
8880                 file_drop(uap->fd);
8881                 return(error);
8882         }
8883         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
8884                 goto out;
8885         }
8886         if (xattr_protected(attrname)) {
8887                 error = EPERM;
8888                 goto out;
8889         }
8890         if (uap->value && uap->size > 0) {
8891                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
8892                                             &uio_buf[0], sizeof(uio_buf));
8893                 uio_addiov(auio, uap->value, uap->size);
8894         }
8895
8896         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
8897 out:
8898         (void)vnode_put(vp);
8899         file_drop(uap->fd);
8900
8901         if (auio) {
8902                 *retval = uap->size - uio_resid(auio);
8903         } else {
8904                 *retval = (user_ssize_t)attrsize;
8905         }
8906         return (error);
8907 }
8908
8909 /*
8910  * Set the data of an extended attribute.
8911  */
8912 int
8913 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
8914 {
8915         vnode_t vp;
8916         struct nameidata nd;
8917         char attrname[XATTR_MAXNAMELEN+1];
8918         vfs_context_t ctx = vfs_context_current();
8919         uio_t auio = NULL;
8920         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8921         size_t namelen;
8922         u_int32_t nameiflags;
8923         int error;
8924         char uio_buf[ UIO_SIZEOF(1) ];
8925
8926         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
8927                 return (EINVAL);
8928
8929         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
8930                 if (error == EPERM) {
8931                         /* if the string won't fit in attrname, copyinstr emits EPERM */
8932                         return (ENAMETOOLONG);
8933                 }
8934                 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
8935                 return error;
8936         }
8937         if (xattr_protected(attrname))
8938                 return(EPERM);
8939         if (uap->size != 0 && uap->value == 0) {
8940                 return (EINVAL);
8941         }
8942
8943         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
8944         NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
8945         if ((error = namei(&nd))) {
8946                 return (error);
8947         }
8948         vp = nd.ni_vp;
8949         nameidone(&nd);
8950
8951         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
8952                                     &uio_buf[0], sizeof(uio_buf));
8953         uio_addiov(auio, uap->value, uap->size);
8954
8955         error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
8956 #if CONFIG_FSE
8957         if (error == 0) {
8958                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
8959                     FSE_ARG_VNODE, vp,
8960                     FSE_ARG_DONE);
8961         }
8962 #endif
8963         vnode_put(vp);
8964         *retval = 0;
8965         return (error);
8966 }
8967
8968 /*
8969  * Set the data of an extended attribute.
8970  */
8971 int
8972 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
8973 {
8974         vnode_t vp;
8975         char attrname[XATTR_MAXNAMELEN+1];
8976         uio_t auio = NULL;
8977         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8978         size_t namelen;
8979         int error;
8980         char uio_buf[ UIO_SIZEOF(1) ];
8981 #if CONFIG_FSE
8982         vfs_context_t ctx = vfs_context_current();
8983 #endif
8984
8985         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
8986                 return (EINVAL);
8987
8988         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
8989                 return (error);
8990         }
8991         if (xattr_protected(attrname))
8992                 return(EPERM);
8993         if (uap->size != 0 && uap->value == 0) {
8994                 return (EINVAL);
8995         }
8996         if ( (error = file_vnode(uap->fd, &vp)) ) {
8997                 return (error);
8998         }
8999         if ( (error = vnode_getwithref(vp)) ) {
9000                 file_drop(uap->fd);
9001                 return(error);
9002         }
9003         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9004                                     &uio_buf[0], sizeof(uio_buf));
9005         uio_addiov(auio, uap->value, uap->size);
9006
9007         error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
9008 #if CONFIG_FSE
9009         if (error == 0) {
9010                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9011                     FSE_ARG_VNODE, vp,
9012                     FSE_ARG_DONE);
9013         }
9014 #endif
9015         vnode_put(vp);
9016         file_drop(uap->fd);
9017         *retval = 0;
9018         return (error);
9019 }
9020
9021 /*
9022  * Remove an extended attribute.
9023  * XXX Code duplication here.
9024  */
9025 int
9026 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
9027 {
9028         vnode_t vp;
9029         struct nameidata nd;
9030         char attrname[XATTR_MAXNAMELEN+1];
9031         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9032         vfs_context_t ctx = vfs_context_current();
9033         size_t namelen;
9034         u_int32_t nameiflags;
9035         int error;
9036
9037         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9038                 return (EINVAL);
9039
9040         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
9041         if (error != 0) {
9042                 return (error);
9043         }
9044         if (xattr_protected(attrname))
9045                 return(EPERM);
9046         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9047         NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
9048         if ((error = namei(&nd))) {
9049                 return (error);
9050         }
9051         vp = nd.ni_vp;
9052         nameidone(&nd);
9053
9054         error = vn_removexattr(vp, attrname, uap->options, ctx);
9055 #if CONFIG_FSE
9056         if (error == 0) {
9057                 add_fsevent(FSE_XATTR_REMOVED, ctx,
9058                     FSE_ARG_VNODE, vp,
9059                     FSE_ARG_DONE);
9060         }
9061 #endif
9062         vnode_put(vp);
9063         *retval = 0;
9064         return (error);
9065 }
9066
9067 /*
9068  * Remove an extended attribute.
9069  * XXX Code duplication here.
9070  */
9071 int
9072 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
9073 {
9074         vnode_t vp;
9075         char attrname[XATTR_MAXNAMELEN+1];
9076         size_t namelen;
9077         int error;
9078 #if CONFIG_FSE
9079         vfs_context_t ctx = vfs_context_current();
9080 #endif
9081
9082         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9083                 return (EINVAL);
9084
9085         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
9086         if (error != 0) {
9087                 return (error);
9088         }
9089         if (xattr_protected(attrname))
9090                 return(EPERM);
9091         if ( (error = file_vnode(uap->fd, &vp)) ) {
9092                 return (error);
9093         }
9094         if ( (error = vnode_getwithref(vp)) ) {
9095                 file_drop(uap->fd);
9096                 return(error);
9097         }
9098
9099         error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
9100 #if CONFIG_FSE
9101         if (error == 0) {
9102                 add_fsevent(FSE_XATTR_REMOVED, ctx,
9103                     FSE_ARG_VNODE, vp,
9104                     FSE_ARG_DONE);
9105         }
9106 #endif
9107         vnode_put(vp);
9108         file_drop(uap->fd);
9109         *retval = 0;
9110         return (error);
9111 }
9112
9113 /*
9114  * Retrieve the list of extended attribute names.
9115  * XXX Code duplication here.
9116  */
9117 int
9118 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
9119 {
9120         vnode_t vp;
9121         struct nameidata nd;
9122         vfs_context_t ctx = vfs_context_current();
9123         uio_t auio = NULL;
9124         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9125         size_t attrsize = 0;
9126         u_int32_t nameiflags;
9127         int error;
9128         char uio_buf[ UIO_SIZEOF(1) ];
9129
9130         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9131                 return (EINVAL);
9132
9133         nameiflags = ((uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW) | NOTRIGGER;
9134         NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
9135         if ((error = namei(&nd))) {
9136                 return (error);
9137         }
9138         vp = nd.ni_vp;
9139         nameidone(&nd);
9140         if (uap->namebuf != 0 && uap->bufsize > 0) {
9141                 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
9142                                             &uio_buf[0], sizeof(uio_buf));
9143                 uio_addiov(auio, uap->namebuf, uap->bufsize);
9144         }
9145
9146         error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
9147
9148         vnode_put(vp);
9149         if (auio) {
9150                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
9151         } else {
9152                 *retval = (user_ssize_t)attrsize;
9153         }
9154         return (error);
9155 }
9156
9157 /*
9158  * Retrieve the list of extended attribute names.
9159  * XXX Code duplication here.
9160  */
9161 int
9162 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
9163 {
9164         vnode_t vp;
9165         uio_t auio = NULL;
9166         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9167         size_t attrsize = 0;
9168         int error;
9169         char uio_buf[ UIO_SIZEOF(1) ];
9170
9171         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9172                 return (EINVAL);
9173
9174         if ( (error = file_vnode(uap->fd, &vp)) ) {
9175                 return (error);
9176         }
9177         if ( (error = vnode_getwithref(vp)) ) {
9178                 file_drop(uap->fd);
9179                 return(error);
9180         }
9181         if (uap->namebuf != 0 && uap->bufsize > 0) {
9182                 auio = uio_createwithbuffer(1, 0, spacetype,
9183                                                                           UIO_READ, &uio_buf[0], sizeof(uio_buf));
9184                 uio_addiov(auio, uap->namebuf, uap->bufsize);
9185         }
9186
9187         error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
9188
9189         vnode_put(vp);
9190         file_drop(uap->fd);
9191         if (auio) {
9192                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
9193         } else {
9194                 *retval = (user_ssize_t)attrsize;
9195         }
9196         return (error);
9197 }
9198
9199 /*
9200  * Obtain the full pathname of a file system object by id.
9201  *
9202  * This is a private SPI used by the File Manager.
9203  */
9204 __private_extern__
9205 int
9206 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
9207 {
9208         vnode_t vp;
9209         struct mount *mp = NULL;
9210         vfs_context_t ctx = vfs_context_current();
9211         fsid_t fsid;
9212         char *realpath;
9213         int bpflags;
9214         int length;
9215         int error;
9216
9217         if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
9218                 return (error);
9219         }
9220         AUDIT_ARG(value32, fsid.val[0]);
9221         AUDIT_ARG(value64, uap->objid);
9222         /* Restrict output buffer size for now. */
9223         if (uap->bufsize > PAGE_SIZE) {
9224                 return (EINVAL);
9225         }
9226         MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
9227         if (realpath == NULL) {
9228                 return (ENOMEM);
9229         }
9230         /* Find the target mountpoint. */
9231         if ((mp = mount_lookupby_volfsid(fsid.val[0], 1)) == NULL) {
9232                 error = ENOTSUP;  /* unexpected failure */
9233                 goto out;
9234         }
9235         /* Find the target vnode. */
9236         if (uap->objid == 2) {
9237                 error = VFS_ROOT(mp, &vp, ctx);
9238         } else {
9239                 error = VFS_VGET(mp, (ino64_t)uap->objid, &vp, ctx);
9240         }
9241         vfs_unbusy(mp);
9242         if (error) {
9243                 goto out;
9244         }
9245 #if CONFIG_MACF
9246         error = mac_vnode_check_fsgetpath(ctx, vp);
9247         if (error) {
9248                 vnode_put(vp);
9249                 goto out;
9250         }
9251 #endif
9252         /* Obtain the absolute path to this vnode. */
9253         bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
9254         bpflags |= BUILDPATH_CHECK_MOVED;
9255         error = build_path(vp, realpath, uap->bufsize, &length, bpflags, ctx);
9256         vnode_put(vp);
9257         if (error) {
9258                 goto out;
9259         }
9260         AUDIT_ARG(text, realpath);
9261         error = copyout((caddr_t)realpath, uap->buf, length);
9262
9263         *retval = (user_ssize_t)length; /* may be superseded by error */
9264 out:
9265         if (realpath) {
9266                 FREE(realpath, M_TEMP);
9267         }
9268         return (error);
9269 }
9270
9271 /*
9272  * Common routine to handle various flavors of statfs data heading out
9273  *      to user space.
9274  *
9275  * Returns:     0                       Success
9276  *              EFAULT
9277  */
9278 static int
9279 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
9280     user_addr_t bufp, int *sizep, boolean_t is_64_bit,
9281     boolean_t partial_copy)
9282 {
9283         int             error;
9284         int             my_size, copy_size;
9285
9286         if (is_64_bit) {
9287                 struct user64_statfs sfs;
9288                 my_size = copy_size = sizeof(sfs);
9289                 bzero(&sfs, my_size);
9290                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
9291                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
9292                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
9293                 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
9294                 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
9295                 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
9296                 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
9297                 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
9298                 sfs.f_files = (user64_long_t)sfsp->f_files;
9299                 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
9300                 sfs.f_fsid = sfsp->f_fsid;
9301                 sfs.f_owner = sfsp->f_owner;
9302                 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
9303                         strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
9304                 } else {
9305                         strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
9306                 }
9307                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
9308                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
9309
9310                 if (partial_copy) {
9311                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
9312                 }
9313                 error = copyout((caddr_t)&sfs, bufp, copy_size);
9314         }
9315         else {
9316                 struct user32_statfs sfs;
9317
9318                 my_size = copy_size = sizeof(sfs);
9319                 bzero(&sfs, my_size);
9320
9321                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
9322                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
9323                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
9324
9325                 /*
9326                  * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
9327                  * have to fudge the numbers here in that case.   We inflate the blocksize in order
9328                  * to reflect the filesystem size as best we can.
9329                  */
9330                 if ((sfsp->f_blocks > INT_MAX)
9331                         /* Hack for 4061702 . I think the real fix is for Carbon to
9332                          * look for some volume capability and not depend on hidden
9333                          * semantics agreed between a FS and carbon.
9334                          * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
9335                          * for Carbon to set bNoVolumeSizes volume attribute.
9336                          * Without this the webdavfs files cannot be copied onto
9337                          * disk as they look huge. This change should not affect
9338                          * XSAN as they should not setting these to -1..
9339                          */
9340                          && (sfsp->f_blocks != 0xffffffffffffffffULL)
9341                          && (sfsp->f_bfree != 0xffffffffffffffffULL)
9342                          && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
9343                         int             shift;
9344
9345                         /*
9346                          * Work out how far we have to shift the block count down to make it fit.
9347                          * Note that it's possible to have to shift so far that the resulting
9348                          * blocksize would be unreportably large.  At that point, we will clip
9349                          * any values that don't fit.
9350                          *
9351                          * For safety's sake, we also ensure that f_iosize is never reported as
9352                          * being smaller than f_bsize.
9353                          */
9354                         for (shift = 0; shift < 32; shift++) {
9355                                 if ((sfsp->f_blocks >> shift) <= INT_MAX)
9356                                         break;
9357                                 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
9358                                         break;
9359                         }
9360 #define __SHIFT_OR_CLIP(x, s)   ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
9361                         sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
9362                         sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
9363                         sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
9364 #undef __SHIFT_OR_CLIP
9365                         sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
9366                         sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
9367                 } else {
9368                         /* filesystem is small enough to be reported honestly */
9369                         sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
9370                         sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
9371                         sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
9372                         sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
9373                         sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
9374                 }
9375                 sfs.f_files = (user32_long_t)sfsp->f_files;
9376                 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
9377                 sfs.f_fsid = sfsp->f_fsid;
9378                 sfs.f_owner = sfsp->f_owner;
9379                 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
9380                         strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
9381                 } else {
9382                         strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
9383                 }
9384                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
9385                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
9386
9387                 if (partial_copy) {
9388                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
9389                 }
9390                 error = copyout((caddr_t)&sfs, bufp, copy_size);
9391         }
9392
9393         if (sizep != NULL) {
9394                 *sizep = my_size;
9395         }
9396         return(error);
9397 }
9398
9399 /*
9400  * copy stat structure into user_stat structure.
9401  */
9402 void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
9403 {
9404         bzero(usbp, sizeof(*usbp));
9405
9406         usbp->st_dev = sbp->st_dev;
9407         usbp->st_ino = sbp->st_ino;
9408         usbp->st_mode = sbp->st_mode;
9409         usbp->st_nlink = sbp->st_nlink;
9410         usbp->st_uid = sbp->st_uid;
9411         usbp->st_gid = sbp->st_gid;
9412         usbp->st_rdev = sbp->st_rdev;
9413 #ifndef _POSIX_C_SOURCE
9414         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9415         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9416         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9417         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9418         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9419         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9420 #else
9421         usbp->st_atime = sbp->st_atime;
9422         usbp->st_atimensec = sbp->st_atimensec;
9423         usbp->st_mtime = sbp->st_mtime;
9424         usbp->st_mtimensec = sbp->st_mtimensec;
9425         usbp->st_ctime = sbp->st_ctime;
9426         usbp->st_ctimensec = sbp->st_ctimensec;
9427 #endif
9428         usbp->st_size = sbp->st_size;
9429         usbp->st_blocks = sbp->st_blocks;
9430         usbp->st_blksize = sbp->st_blksize;
9431         usbp->st_flags = sbp->st_flags;
9432         usbp->st_gen = sbp->st_gen;
9433         usbp->st_lspare = sbp->st_lspare;
9434         usbp->st_qspare[0] = sbp->st_qspare[0];
9435         usbp->st_qspare[1] = sbp->st_qspare[1];
9436 }
9437
9438 void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
9439 {
9440         bzero(usbp, sizeof(*usbp));
9441
9442         usbp->st_dev = sbp->st_dev;
9443         usbp->st_ino = sbp->st_ino;
9444         usbp->st_mode = sbp->st_mode;
9445         usbp->st_nlink = sbp->st_nlink;
9446         usbp->st_uid = sbp->st_uid;
9447         usbp->st_gid = sbp->st_gid;
9448         usbp->st_rdev = sbp->st_rdev;
9449 #ifndef _POSIX_C_SOURCE
9450         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9451         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9452         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9453         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9454         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9455         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9456 #else
9457         usbp->st_atime = sbp->st_atime;
9458         usbp->st_atimensec = sbp->st_atimensec;
9459         usbp->st_mtime = sbp->st_mtime;
9460         usbp->st_mtimensec = sbp->st_mtimensec;
9461         usbp->st_ctime = sbp->st_ctime;
9462         usbp->st_ctimensec = sbp->st_ctimensec;
9463 #endif
9464         usbp->st_size = sbp->st_size;
9465         usbp->st_blocks = sbp->st_blocks;
9466         usbp->st_blksize = sbp->st_blksize;
9467         usbp->st_flags = sbp->st_flags;
9468         usbp->st_gen = sbp->st_gen;
9469         usbp->st_lspare = sbp->st_lspare;
9470         usbp->st_qspare[0] = sbp->st_qspare[0];
9471         usbp->st_qspare[1] = sbp->st_qspare[1];
9472 }
9473
9474 /*
9475  * copy stat64 structure into user_stat64 structure.
9476  */
9477 void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
9478 {
9479         bzero(usbp, sizeof(*usbp));
9480
9481         usbp->st_dev = sbp->st_dev;
9482         usbp->st_ino = sbp->st_ino;
9483         usbp->st_mode = sbp->st_mode;
9484         usbp->st_nlink = sbp->st_nlink;
9485         usbp->st_uid = sbp->st_uid;
9486         usbp->st_gid = sbp->st_gid;
9487         usbp->st_rdev = sbp->st_rdev;
9488 #ifndef _POSIX_C_SOURCE
9489         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9490         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9491         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9492         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9493         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9494         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9495         usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
9496         usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
9497 #else
9498         usbp->st_atime = sbp->st_atime;
9499         usbp->st_atimensec = sbp->st_atimensec;
9500         usbp->st_mtime = sbp->st_mtime;
9501         usbp->st_mtimensec = sbp->st_mtimensec;
9502         usbp->st_ctime = sbp->st_ctime;
9503         usbp->st_ctimensec = sbp->st_ctimensec;
9504         usbp->st_birthtime = sbp->st_birthtime;
9505         usbp->st_birthtimensec = sbp->st_birthtimensec;
9506 #endif
9507         usbp->st_size = sbp->st_size;
9508         usbp->st_blocks = sbp->st_blocks;
9509         usbp->st_blksize = sbp->st_blksize;
9510         usbp->st_flags = sbp->st_flags;
9511         usbp->st_gen = sbp->st_gen;
9512         usbp->st_lspare = sbp->st_lspare;
9513         usbp->st_qspare[0] = sbp->st_qspare[0];
9514         usbp->st_qspare[1] = sbp->st_qspare[1];
9515 }
9516
9517 void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
9518 {
9519         bzero(usbp, sizeof(*usbp));
9520
9521         usbp->st_dev = sbp->st_dev;
9522         usbp->st_ino = sbp->st_ino;
9523         usbp->st_mode = sbp->st_mode;
9524         usbp->st_nlink = sbp->st_nlink;
9525         usbp->st_uid = sbp->st_uid;
9526         usbp->st_gid = sbp->st_gid;
9527         usbp->st_rdev = sbp->st_rdev;
9528 #ifndef _POSIX_C_SOURCE
9529         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9530         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9531         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9532         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9533         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9534         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9535         usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
9536         usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
9537 #else
9538         usbp->st_atime = sbp->st_atime;
9539         usbp->st_atimensec = sbp->st_atimensec;
9540         usbp->st_mtime = sbp->st_mtime;
9541         usbp->st_mtimensec = sbp->st_mtimensec;
9542         usbp->st_ctime = sbp->st_ctime;
9543         usbp->st_ctimensec = sbp->st_ctimensec;
9544         usbp->st_birthtime = sbp->st_birthtime;
9545         usbp->st_birthtimensec = sbp->st_birthtimensec;
9546 #endif
9547         usbp->st_size = sbp->st_size;
9548         usbp->st_blocks = sbp->st_blocks;
9549         usbp->st_blksize = sbp->st_blksize;
9550         usbp->st_flags = sbp->st_flags;
9551         usbp->st_gen = sbp->st_gen;
9552         usbp->st_lspare = sbp->st_lspare;
9553         usbp->st_qspare[0] = sbp->st_qspare[0];
9554         usbp->st_qspare[1] = sbp->st_qspare[1];
9555 }