bsd/vfs/vfs_syscalls.c

   1 /*
   2  * Copyright (c) 1995-2010 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1989, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  * (c) UNIX System Laboratories, Inc.
  32  * All or some portions of this file are derived from material licensed
  33  * to the University of California by American Telephone and Telegraph
  34  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  35  * the permission of UNIX System Laboratories, Inc.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      @(#)vfs_syscalls.c      8.41 (Berkeley) 6/15/95
  66  */
  67 /*
  68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  69  * support for mandatory and extensible security protections.  This notice
  70  * is included in support of clause 2.2 (b) of the Apple Public License,
  71  * Version 2.0.
  72  */
  73
  74 #include <sys/param.h>
  75 #include <sys/systm.h>
  76 #include <sys/namei.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/kernel.h>
  79 #include <sys/file_internal.h>
  80 #include <sys/stat.h>
  81 #include <sys/vnode_internal.h>
  82 #include <sys/mount_internal.h>
  83 #include <sys/proc_internal.h>
  84 #include <sys/kauth.h>
  85 #include <sys/uio_internal.h>
  86 #include <sys/malloc.h>
  87 #include <sys/mman.h>
  88 #include <sys/dirent.h>
  89 #include <sys/attr.h>
  90 #include <sys/sysctl.h>
  91 #include <sys/ubc.h>
  92 #include <sys/quota.h>
  93 #include <sys/kdebug.h>
  94 #include <sys/fsevents.h>
  95 #include <sys/imgsrc.h>
  96 #include <sys/sysproto.h>
  97 #include <sys/xattr.h>
  98 #include <sys/fcntl.h>
  99 #include <sys/fsctl.h>
 100 #include <sys/ubc_internal.h>
 101 #include <sys/disk.h>
 102 #include <machine/cons.h>
 103 #include <machine/limits.h>
 104 #include <miscfs/specfs/specdev.h>
 105
 106 #include <security/audit/audit.h>
 107 #include <bsm/audit_kevents.h>
 108
 109 #include <mach/mach_types.h>
 110 #include <kern/kern_types.h>
 111 #include <kern/kalloc.h>
 112 #include <kern/task.h>
 113
 114 #include <vm/vm_pageout.h>
 115
 116 #include <libkern/OSAtomic.h>
 117 #include <pexpert/pexpert.h>
 118
 119 #if CONFIG_MACF
 120 #include <security/mac.h>
 121 #include <security/mac_framework.h>
 122 #endif
 123
 124 #if CONFIG_FSE
 125 #define GET_PATH(x) \
 126         (x) = get_pathbuff();
 127 #define RELEASE_PATH(x) \
 128         release_pathbuff(x);
 129 #else
 130 #define GET_PATH(x)     \
 131         MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
 132 #define RELEASE_PATH(x) \
 133         FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
 134 #endif /* CONFIG_FSE */
 135
 136 /* struct for checkdirs iteration */
 137 struct cdirargs {
 138         vnode_t olddp;
 139         vnode_t newdp;
 140 };
 141 /* callback  for checkdirs iteration */
 142 static int checkdirs_callback(proc_t p, void * arg);
 143
 144 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
 145 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
 146 void enablequotas(struct mount *mp, vfs_context_t ctx);
 147 static int getfsstat_callback(mount_t mp, void * arg);
 148 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
 149 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
 150 static int sync_callback(mount_t, void *);
 151 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
 152                         user_addr_t bufp, int *sizep, boolean_t is_64_bit,
 153                                                 boolean_t partial_copy);
 154 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
 155                         user_addr_t bufp);
 156 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
 157 static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
 158                         struct componentname *cnp, user_addr_t fsmountargs,
 159                         int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
 160                         vfs_context_t ctx);
 161 void vfs_notify_mount(vnode_t pdvp);
 162
 163 int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
 164
 165 #ifdef CONFIG_IMGSRC_ACCESS
 166 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
 167 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
 168 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
 169 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
 170 static void mount_end_update(mount_t mp);
 171 static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
 172 #endif /* CONFIG_IMGSRC_ACCESS */
 173
 174 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
 175
 176 __private_extern__
 177 int sync_internal(void);
 178
 179 __private_extern__
 180 int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, int32_t *);
 181
 182 __private_extern__
 183 int unlink1(vfs_context_t, struct nameidata *, int);
 184
 185
 186 #ifdef __APPLE_API_OBSOLETE
 187 struct fstatv_args {
 188        int fd;                  /* file descriptor of the target file */
 189        struct vstat *vsb;       /* vstat structure for returned info  */
 190 };
 191 struct lstatv_args {
 192        const char *path;        /* pathname of the target file       */
 193        struct vstat *vsb;       /* vstat structure for returned info */
 194 };
 195 struct mkcomplex_args {
 196         const char *path;       /* pathname of the file to be created */
 197                 mode_t mode;            /* access mode for the newly created file */
 198         u_int32_t type;         /* format of the complex file */
 199 };
 200 struct statv_args {
 201         const char *path;       /* pathname of the target file       */
 202         struct vstat *vsb;      /* vstat structure for returned info */
 203 };
 204
 205 int fstatv(proc_t p, struct fstatv_args *uap, int32_t *retval);
 206 int lstatv(proc_t p, struct lstatv_args *uap, int32_t *retval);
 207 int mkcomplex(proc_t p, struct mkcomplex_args *uap, int32_t *retval);
 208 int statv(proc_t p, struct statv_args *uap, int32_t *retval);
 209
 210 #endif /* __APPLE_API_OBSOLETE */
 211
 212 /*
 213  * incremented each time a mount or unmount operation occurs
 214  * used to invalidate the cached value of the rootvp in the
 215  * mount structure utilized by cache_lookup_path
 216  */
 217 uint32_t mount_generation = 0;
 218
 219 /* counts number of mount and unmount operations */
 220 unsigned int vfs_nummntops=0;
 221
 222 extern struct fileops vnops;
 223 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
 224
 225
 226 /*
 227  * Virtual File System System Calls
 228  */
 229
 230 #if NFSCLIENT
 231 /*
 232  * Private in-kernel mounting spi (NFS only, not exported)
 233  */
 234  __private_extern__
 235 boolean_t
 236 vfs_iskernelmount(mount_t mp)
 237 {
 238         return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
 239 }
 240
 241  __private_extern__
 242 int
 243 kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
 244              void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
 245 {
 246         struct nameidata nd;
 247         boolean_t did_namei;
 248         int error;
 249
 250         NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
 251                UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
 252
 253         /*
 254          * Get the vnode to be covered if it's not supplied
 255          */
 256         if (vp == NULLVP) {
 257                 error = namei(&nd);
 258                 if (error)
 259                         return (error);
 260                 vp = nd.ni_vp;
 261                 pvp = nd.ni_dvp;
 262                 did_namei = TRUE;
 263         } else {
 264                 char *pnbuf = CAST_DOWN(char *, path);
 265
 266                 nd.ni_cnd.cn_pnbuf = pnbuf;
 267                 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
 268                 did_namei = FALSE;
 269         }
 270
 271         error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
 272                              syscall_flags, kern_flags, NULL, TRUE, ctx);
 273
 274         if (did_namei) {
 275                 vnode_put(vp);
 276                 vnode_put(pvp);
 277                 nameidone(&nd);
 278         }
 279
 280         return (error);
 281 }
 282 #endif /* NFSCLIENT */
 283
 284 /*
 285  * Mount a file system.
 286  */
 287 /* ARGSUSED */
 288 int
 289 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
 290 {
 291         struct __mac_mount_args muap;
 292
 293         muap.type = uap->type;
 294         muap.path = uap->path;
 295         muap.flags = uap->flags;
 296         muap.data = uap->data;
 297         muap.mac_p = USER_ADDR_NULL;
 298         return (__mac_mount(p, &muap, retval));
 299 }
 300
 301 void
 302 vfs_notify_mount(vnode_t pdvp)
 303 {
 304         vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
 305         lock_vnode_and_post(pdvp, NOTE_WRITE);
 306 }
 307
 308 /*
 309  * __mac_mount:
 310  *      Mount a file system taking into account MAC label behavior.
 311  *      See mount(2) man page for more information
 312  *
 313  * Parameters:    p                        Process requesting the mount
 314  *                uap                      User argument descriptor (see below)
 315  *                retval                   (ignored)
 316  *
 317  * Indirect:      uap->type                Filesystem type
 318  *                uap->path                Path to mount
 319  *                uap->data                Mount arguments
 320  *                uap->mac_p               MAC info
 321  *                uap->flags               Mount flags
 322  *
 323  *
 324  * Returns:        0                       Success
 325  *                !0                       Not success
 326  */
 327 boolean_t root_fs_upgrade_try = FALSE;
 328
 329 int
 330 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
 331 {
 332         vnode_t pvp, vp;
 333         vfs_context_t ctx = vfs_context_current();
 334         char fstypename[MFSNAMELEN];
 335         struct nameidata nd;
 336         size_t dummy=0;
 337         char *labelstr = NULL;
 338         int flags = uap->flags;
 339         int error;
 340         boolean_t is_64bit = IS_64BIT_PROCESS(p);
 341
 342         /*
 343          * Get the fs type name from user space
 344          */
 345         error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
 346         if (error)
 347                 return (error);
 348
 349         /*
 350          * Get the vnode to be covered
 351          */
 352         NDINIT(&nd, LOOKUP, OP_MOUNT, NOTRIGGER | FOLLOW | AUDITVNPATH1 | WANTPARENT,
 353                UIO_USERSPACE, uap->path, ctx);
 354         error = namei(&nd);
 355         if (error)
 356                 return (error);
 357         vp = nd.ni_vp;
 358         pvp = nd.ni_dvp;
 359
 360 #ifdef CONFIG_IMGSRC_ACCESS
 361         /* Mounting image source cannot be batched with other operations */
 362         if (flags == MNT_IMGSRC_BY_INDEX) {
 363                 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
 364                                                   ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
 365                 goto out;
 366         }
 367 #endif /* CONFIG_IMGSRC_ACCESS */
 368
 369 #if CONFIG_MACF
 370         /*
 371          * Get the label string (if any) from user space
 372          */
 373         if (uap->mac_p != USER_ADDR_NULL) {
 374                 struct user_mac mac;
 375                 size_t ulen = 0;
 376
 377                 if (is_64bit) {
 378                         struct user64_mac mac64;
 379                         error = copyin(uap->mac_p, &mac64, sizeof(mac64));
 380                         mac.m_buflen = mac64.m_buflen;
 381                         mac.m_string = mac64.m_string;
 382                 } else {
 383                         struct user32_mac mac32;
 384                         error = copyin(uap->mac_p, &mac32, sizeof(mac32));
 385                         mac.m_buflen = mac32.m_buflen;
 386                         mac.m_string = mac32.m_string;
 387                 }
 388                 if (error)
 389                         goto out;
 390                 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
 391                     (mac.m_buflen < 2)) {
 392                         error = EINVAL;
 393                         goto out;
 394                 }
 395                 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
 396                 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
 397                 if (error) {
 398                         goto out;
 399                 }
 400                 AUDIT_ARG(mac_string, labelstr);
 401         }
 402 #endif /* CONFIG_MACF */
 403
 404         AUDIT_ARG(fflags, flags);
 405
 406         if ((vp->v_flag & VROOT) &&
 407                 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
 408                         flags |= MNT_UPDATE;
 409         /*
 410          * See 7392553 for more details on why this check exists.
 411          * Suffice to say: If this check is ON and something tries
 412          * to mount the rootFS RW, we'll turn off the codesign
 413          * bitmap optimization.
 414          */
 415 #if CHECK_CS_VALIDATION_BITMAP
 416                 if ( !(flags & MNT_RDONLY) ) {
 417                         root_fs_upgrade_try = TRUE;
 418                 }
 419 #endif
 420         }
 421
 422         error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
 423                              labelstr, FALSE, ctx);
 424 out:
 425 #if CONFIG_MACF
 426         if (labelstr)
 427                 FREE(labelstr, M_MACTEMP);
 428 #endif /* CONFIG_MACF */
 429
 430         vnode_put(vp);
 431         vnode_put(pvp);
 432         nameidone(&nd);
 433
 434         return (error);
 435 }
 436
 437 /*
 438  * common mount implementation (final stage of mounting)
 439
 440  * Arguments:
 441  *  fstypename  file system type (ie it's vfs name)
 442  *  pvp         parent of covered vnode
 443  *  vp          covered vnode
 444  *  cnp         component name (ie path) of covered vnode
 445  *  flags       generic mount flags
 446  *  fsmountargs file system specific data
 447  *  labelstr    optional MAC label
 448  *  kernelmount TRUE for mounts initiated from inside the kernel
 449  *  ctx         caller's context
 450  */
 451 static int
 452 mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
 453              struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
 454              char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
 455 {
 456         struct vnode *devvp = NULLVP;
 457         struct vnode *device_vnode = NULLVP;
 458 #if CONFIG_MACF
 459         struct vnode *rvp;
 460 #endif
 461         struct mount *mp;
 462         struct vfstable *vfsp = (struct vfstable *)0;
 463         struct proc *p = vfs_context_proc(ctx);
 464         int error, flag = 0;
 465         user_addr_t devpath = USER_ADDR_NULL;
 466         int ronly = 0;
 467         int mntalloc = 0;
 468         boolean_t vfsp_ref = FALSE;
 469         boolean_t is_rwlock_locked = FALSE;
 470         boolean_t did_rele = FALSE;
 471         boolean_t have_usecount = FALSE;
 472
 473         /*
 474          * Process an update for an existing mount
 475          */
 476         if (flags & MNT_UPDATE) {
 477                 if ((vp->v_flag & VROOT) == 0) {
 478                         error = EINVAL;
 479                         goto out1;
 480                 }
 481                 mp = vp->v_mount;
 482
 483                 /* unmount in progress return error */
 484                 mount_lock_spin(mp);
 485                 if (mp->mnt_lflag & MNT_LUNMOUNT) {
 486                         mount_unlock(mp);
 487                         error = EBUSY;
 488                         goto out1;
 489                 }
 490                 mount_unlock(mp);
 491                 lck_rw_lock_exclusive(&mp->mnt_rwlock);
 492                 is_rwlock_locked = TRUE;
 493                 /*
 494                  * We only allow the filesystem to be reloaded if it
 495                  * is currently mounted read-only.
 496                  */
 497                 if ((flags & MNT_RELOAD) &&
 498                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
 499                         error = ENOTSUP;
 500                         goto out1;
 501                 }
 502
 503 #ifdef CONFIG_IMGSRC_ACCESS
 504                 /* Can't downgrade the backer of the root FS */
 505                 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
 506                         (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
 507                         error = ENOTSUP;
 508                         goto out1;
 509                 }
 510 #endif /* CONFIG_IMGSRC_ACCESS */
 511
 512                 /*
 513                  * Only root, or the user that did the original mount is
 514                  * permitted to update it.
 515                  */
 516                 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
 517                     (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
 518                         goto out1;
 519                 }
 520 #if CONFIG_MACF
 521                 error = mac_mount_check_remount(ctx, mp);
 522                 if (error != 0) {
 523                         goto out1;
 524                 }
 525 #endif
 526                 /*
 527                  * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
 528                  * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
 529                  */
 530                 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
 531                         flags |= MNT_NOSUID | MNT_NODEV;
 532                         if (mp->mnt_flag & MNT_NOEXEC)
 533                                 flags |= MNT_NOEXEC;
 534                 }
 535                 flag = mp->mnt_flag;
 536
 537                 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
 538
 539                 vfsp = mp->mnt_vtable;
 540                 goto update;
 541         }
 542         /*
 543          * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
 544          * MNT_NOEXEC if mount point is already MNT_NOEXEC.
 545          */
 546         if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
 547                 flags |= MNT_NOSUID | MNT_NODEV;
 548                 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
 549                         flags |= MNT_NOEXEC;
 550         }
 551
 552         /* XXXAUDIT: Should we capture the type on the error path as well? */
 553         AUDIT_ARG(text, fstypename);
 554         mount_list_lock();
 555         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 556                 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
 557                         vfsp->vfc_refcount++;
 558                         vfsp_ref = TRUE;
 559                         break;
 560                 }
 561         mount_list_unlock();
 562         if (vfsp == NULL) {
 563                 error = ENODEV;
 564                 goto out1;
 565         }
 566
 567         /*
 568          * VFC_VFSLOCALARGS is not currently supported for kernel mounts
 569          */
 570         if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
 571                 error = EINVAL;  /* unsupported request */
 572                 goto out1;
 573         }
 574
 575         error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
 576         if (error != 0) {
 577                 goto out1;
 578         }
 579
 580         /*
 581          * Allocate and initialize the filesystem (mount_t)
 582          */
 583         MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
 584                 M_MOUNT, M_WAITOK);
 585         bzero((char *)mp, (u_int32_t)sizeof(struct mount));
 586         mntalloc = 1;
 587
 588         /* Initialize the default IO constraints */
 589         mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
 590         mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
 591         mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
 592         mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
 593         mp->mnt_devblocksize = DEV_BSIZE;
 594         mp->mnt_alignmentmask = PAGE_MASK;
 595         mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
 596         mp->mnt_ioscale = 1;
 597         mp->mnt_ioflags = 0;
 598         mp->mnt_realrootvp = NULLVP;
 599         mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
 600
 601         TAILQ_INIT(&mp->mnt_vnodelist);
 602         TAILQ_INIT(&mp->mnt_workerqueue);
 603         TAILQ_INIT(&mp->mnt_newvnodes);
 604         mount_lock_init(mp);
 605         lck_rw_lock_exclusive(&mp->mnt_rwlock);
 606         is_rwlock_locked = TRUE;
 607         mp->mnt_op = vfsp->vfc_vfsops;
 608         mp->mnt_vtable = vfsp;
 609         //mp->mnt_stat.f_type = vfsp->vfc_typenum;
 610         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 611         strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
 612         strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
 613         mp->mnt_vnodecovered = vp;
 614         mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
 615         mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
 616         mp->mnt_devbsdunit = 0;
 617
 618         /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
 619         vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
 620
 621 #if NFSCLIENT
 622         if (kernelmount)
 623                 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
 624         if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
 625                 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
 626 #endif /* NFSCLIENT */
 627
 628 update:
 629         /*
 630          * Set the mount level flags.
 631          */
 632         if (flags & MNT_RDONLY)
 633                 mp->mnt_flag |= MNT_RDONLY;
 634         else if (mp->mnt_flag & MNT_RDONLY) {
 635                 // disallow read/write upgrades of file systems that
 636                 // had the TYPENAME_OVERRIDE feature set.
 637                 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
 638                         error = EPERM;
 639                         goto out1;
 640                 }
 641                 mp->mnt_kern_flag |= MNTK_WANTRDWR;
 642         }
 643         mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 644                           MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 645                           MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
 646                           MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
 647                           MNT_QUARANTINE | MNT_CPROTECT);
 648         mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 649                                  MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 650                                  MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
 651                                  MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
 652                                  MNT_QUARANTINE | MNT_CPROTECT);
 653
 654 #if CONFIG_MACF
 655         if (flags & MNT_MULTILABEL) {
 656                 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
 657                         error = EINVAL;
 658                         goto out1;
 659                 }
 660                 mp->mnt_flag |= MNT_MULTILABEL;
 661         }
 662 #endif
 663         /*
 664          * Process device path for local file systems if requested
 665          */
 666         if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
 667                 if (vfs_context_is64bit(ctx)) {
 668                         if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
 669                                 goto out1;
 670                         fsmountargs += sizeof(devpath);
 671                 } else {
 672                         user32_addr_t tmp;
 673                         if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
 674                                 goto out1;
 675                         /* munge into LP64 addr */
 676                         devpath = CAST_USER_ADDR_T(tmp);
 677                         fsmountargs += sizeof(tmp);
 678                 }
 679
 680                 /* Lookup device and authorize access to it */
 681                 if ((devpath)) {
 682                         struct nameidata nd;
 683
 684                         NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
 685                         if ( (error = namei(&nd)) )
 686                                 goto out1;
 687
 688                         strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
 689                         devvp = nd.ni_vp;
 690
 691                         nameidone(&nd);
 692
 693                         if (devvp->v_type != VBLK) {
 694                                 error = ENOTBLK;
 695                                 goto out2;
 696                         }
 697                         if (major(devvp->v_rdev) >= nblkdev) {
 698                                 error = ENXIO;
 699                                 goto out2;
 700                         }
 701                         /*
 702                         * If mount by non-root, then verify that user has necessary
 703                         * permissions on the device.
 704                         */
 705                         if (suser(vfs_context_ucred(ctx), NULL) != 0) {
 706                                 mode_t accessmode = KAUTH_VNODE_READ_DATA;
 707
 708                                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
 709                                         accessmode |= KAUTH_VNODE_WRITE_DATA;
 710                                 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
 711                                         goto out2;
 712                         }
 713                 }
 714                 /* On first mount, preflight and open device */
 715                 if (devpath && ((flags & MNT_UPDATE) == 0)) {
 716                         if ( (error = vnode_ref(devvp)) )
 717                                 goto out2;
 718                         /*
 719                         * Disallow multiple mounts of the same device.
 720                         * Disallow mounting of a device that is currently in use
 721                         * (except for root, which might share swap device for miniroot).
 722                         * Flush out any old buffers remaining from a previous use.
 723                         */
 724                         if ( (error = vfs_mountedon(devvp)) )
 725                                 goto out3;
 726
 727                         if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
 728                                 error = EBUSY;
 729                                 goto out3;
 730                         }
 731                         if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
 732                                 error = ENOTBLK;
 733                                 goto out3;
 734                         }
 735                         if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
 736                                 goto out3;
 737
 738                         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 739 #if CONFIG_MACF
 740                         error = mac_vnode_check_open(ctx,
 741                             devvp,
 742                             ronly ? FREAD : FREAD|FWRITE);
 743                         if (error)
 744                                 goto out3;
 745 #endif /* MAC */
 746                         if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
 747                                 goto out3;
 748
 749                         mp->mnt_devvp = devvp;
 750                         device_vnode = devvp;
 751
 752                 } else if ((mp->mnt_flag & MNT_RDONLY) &&
 753                            (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
 754                            (device_vnode = mp->mnt_devvp)) {
 755                         dev_t dev;
 756                         int maj;
 757                         /*
 758                          * If upgrade to read-write by non-root, then verify
 759                          * that user has necessary permissions on the device.
 760                          */
 761                         vnode_getalways(device_vnode);
 762
 763                         if (suser(vfs_context_ucred(ctx), NULL) &&
 764                             (error = vnode_authorize(device_vnode, NULL,
 765                              KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
 766                              ctx)) != 0) {
 767                                 vnode_put(device_vnode);
 768                                 goto out2;
 769                         }
 770
 771                         /* Tell the device that we're upgrading */
 772                         dev = (dev_t)device_vnode->v_rdev;
 773                         maj = major(dev);
 774
 775                         if ((u_int)maj >= (u_int)nblkdev)
 776                                 panic("Volume mounted on a device with invalid major number.");
 777
 778                         error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
 779                         vnode_put(device_vnode);
 780                         device_vnode = NULLVP;
 781                         if (error != 0) {
 782                                 goto out2;
 783                         }
 784                 }
 785         }
 786 #if CONFIG_MACF
 787         if ((flags & MNT_UPDATE) == 0) {
 788                 mac_mount_label_init(mp);
 789                 mac_mount_label_associate(ctx, mp);
 790         }
 791         if (labelstr) {
 792                 if ((flags & MNT_UPDATE) != 0) {
 793                         error = mac_mount_check_label_update(ctx, mp);
 794                         if (error != 0)
 795                                 goto out3;
 796                 }
 797         }
 798 #endif
 799         /*
 800          * Mount the filesystem.
 801          */
 802         error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
 803
 804         if (flags & MNT_UPDATE) {
 805                 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
 806                         mp->mnt_flag &= ~MNT_RDONLY;
 807                 mp->mnt_flag &=~
 808                     (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
 809                 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
 810                 if (error)
 811                         mp->mnt_flag = flag;  /* restore flag value */
 812                 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
 813                 lck_rw_done(&mp->mnt_rwlock);
 814                 is_rwlock_locked = FALSE;
 815                 if (!error)
 816                         enablequotas(mp, ctx);
 817                 goto exit;
 818         }
 819
 820         /*
 821          * Put the new filesystem on the mount list after root.
 822          */
 823         if (error == 0) {
 824                 struct vfs_attr vfsattr;
 825 #if CONFIG_MACF
 826                 if (vfs_flags(mp) & MNT_MULTILABEL) {
 827                         error = VFS_ROOT(mp, &rvp, ctx);
 828                         if (error) {
 829                                 printf("%s() VFS_ROOT returned %d\n", __func__, error);
 830                                 goto out3;
 831                         }
 832                         error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
 833                         /*
 834                          * drop reference provided by VFS_ROOT
 835                          */
 836                         vnode_put(rvp);
 837
 838                         if (error)
 839                                 goto out3;
 840                 }
 841 #endif  /* MAC */
 842
 843                 vnode_lock_spin(vp);
 844                 CLR(vp->v_flag, VMOUNT);
 845                 vp->v_mountedhere = mp;
 846                 vnode_unlock(vp);
 847
 848                 /*
 849                  * taking the name_cache_lock exclusively will
 850                  * insure that everyone is out of the fast path who
 851                  * might be trying to use a now stale copy of
 852                  * vp->v_mountedhere->mnt_realrootvp
 853                  * bumping mount_generation causes the cached values
 854                  * to be invalidated
 855                  */
 856                 name_cache_lock();
 857                 mount_generation++;
 858                 name_cache_unlock();
 859
 860                 error = vnode_ref(vp);
 861                 if (error != 0) {
 862                         goto out4;
 863                 }
 864
 865                 have_usecount = TRUE;
 866
 867                 error = checkdirs(vp, ctx);
 868                 if (error != 0)  {
 869                         /* Unmount the filesystem as cdir/rdirs cannot be updated */
 870                         goto out4;
 871                 }
 872                 /*
 873                  * there is no cleanup code here so I have made it void
 874                  * we need to revisit this
 875                  */
 876                 (void)VFS_START(mp, 0, ctx);
 877
 878                 if (mount_list_add(mp) != 0) {
 879                         /*
 880                          * The system is shutting down trying to umount
 881                          * everything, so fail with a plausible errno.
 882                          */
 883                         error = EBUSY;
 884                         goto out4;
 885                 }
 886                 lck_rw_done(&mp->mnt_rwlock);
 887                 is_rwlock_locked = FALSE;
 888
 889                 /* Check if this mounted file system supports EAs or named streams. */
 890                 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
 891                 VFSATTR_INIT(&vfsattr);
 892                 VFSATTR_WANTED(&vfsattr, f_capabilities);
 893                 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
 894                     vfs_getattr(mp, &vfsattr, ctx) == 0 &&
 895                     VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
 896                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
 897                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
 898                                 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 899                         }
 900 #if NAMEDSTREAMS
 901                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
 902                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
 903                                 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
 904                         }
 905 #endif
 906                         /* Check if this file system supports path from id lookups. */
 907                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
 908                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
 909                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 910                         } else if (mp->mnt_flag & MNT_DOVOLFS) {
 911                                 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
 912                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 913                         }
 914                 }
 915                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
 916                         mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 917                 }
 918                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
 919                         mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
 920                 }
 921                 /* increment the operations count */
 922                 OSAddAtomic(1, &vfs_nummntops);
 923                 enablequotas(mp, ctx);
 924
 925                 if (device_vnode) {
 926                         device_vnode->v_specflags |= SI_MOUNTEDON;
 927
 928                         /*
 929                          *   cache the IO attributes for the underlying physical media...
 930                          *   an error return indicates the underlying driver doesn't
 931                          *   support all the queries necessary... however, reasonable
 932                          *   defaults will have been set, so no reason to bail or care
 933                          */
 934                         vfs_init_io_attributes(device_vnode, mp);
 935                 }
 936
 937                 /* Now that mount is setup, notify the listeners */
 938                 vfs_notify_mount(pvp);
 939         } else {
 940                 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
 941                 if (mp->mnt_vnodelist.tqh_first != NULL) {
 942                         panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
 943                                         mp->mnt_vtable->vfc_name, error);
 944                 }
 945
 946                 vnode_lock_spin(vp);
 947                 CLR(vp->v_flag, VMOUNT);
 948                 vnode_unlock(vp);
 949                 mount_list_lock();
 950                 mp->mnt_vtable->vfc_refcount--;
 951                 mount_list_unlock();
 952
 953                 if (device_vnode ) {
 954                         vnode_rele(device_vnode);
 955                         VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
 956                 }
 957                 lck_rw_done(&mp->mnt_rwlock);
 958                 is_rwlock_locked = FALSE;
 959
 960                 /*
 961                  * if we get here, we have a mount structure that needs to be freed,
 962                  * but since the coveredvp hasn't yet been updated to point at it,
 963                  * no need to worry about other threads holding a crossref on this mp
 964                  * so it's ok to just free it
 965                  */
 966                 mount_lock_destroy(mp);
 967 #if CONFIG_MACF
 968                 mac_mount_label_destroy(mp);
 969 #endif
 970                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
 971         }
 972 exit:
 973         /*
 974          * drop I/O count on the device vp if there was one
 975          */
 976         if (devpath && devvp)
 977                 vnode_put(devvp);
 978
 979         return(error);
 980
 981 /* Error condition exits */
 982 out4:
 983         (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
 984
 985         /*
 986          * If the mount has been placed on the covered vp,
 987          * it may have been discovered by now, so we have
 988          * to treat this just like an unmount
 989          */
 990         mount_lock_spin(mp);
 991         mp->mnt_lflag |= MNT_LDEAD;
 992         mount_unlock(mp);
 993
 994         if (device_vnode != NULLVP) {
 995                 vnode_rele(device_vnode);
 996                 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
 997                        ctx);
 998                 did_rele = TRUE;
 999         }
1000
1001         vnode_lock_spin(vp);
1002
1003         mp->mnt_crossref++;
1004         vp->v_mountedhere = (mount_t) 0;
1005
1006         vnode_unlock(vp);
1007
1008         if (have_usecount) {
1009                 vnode_rele(vp);
1010         }
1011 out3:
1012         if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
1013                 vnode_rele(devvp);
1014 out2:
1015         if (devpath && devvp)
1016                 vnode_put(devvp);
1017 out1:
1018         /* Release mnt_rwlock only when it was taken */
1019         if (is_rwlock_locked == TRUE) {
1020                 lck_rw_done(&mp->mnt_rwlock);
1021         }
1022
1023         if (mntalloc) {
1024                 if (mp->mnt_crossref)
1025                         mount_dropcrossref(mp, vp, 0);
1026                 else {
1027                         mount_lock_destroy(mp);
1028 #if CONFIG_MACF
1029                         mac_mount_label_destroy(mp);
1030 #endif
1031                         FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1032                 }
1033         }
1034         if (vfsp_ref) {
1035                 mount_list_lock();
1036                 vfsp->vfc_refcount--;
1037                 mount_list_unlock();
1038         }
1039
1040         return(error);
1041 }
1042
1043 /*
1044  * Flush in-core data, check for competing mount attempts,
1045  * and set VMOUNT
1046  */
1047 int
1048 prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
1049 {
1050         struct vnode_attr va;
1051         int error;
1052
1053         if (!skip_auth) {
1054                 /*
1055                  * If the user is not root, ensure that they own the directory
1056                  * onto which we are attempting to mount.
1057                  */
1058                 VATTR_INIT(&va);
1059                 VATTR_WANTED(&va, va_uid);
1060                 if ((error = vnode_getattr(vp, &va, ctx)) ||
1061                                 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1062                                  (!vfs_context_issuser(ctx)))) {
1063                         error = EPERM;
1064                         goto out;
1065                 }
1066         }
1067
1068         if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1069                 goto out;
1070
1071         if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1072                 goto out;
1073
1074         if (vp->v_type != VDIR) {
1075                 error = ENOTDIR;
1076                 goto out;
1077         }
1078
1079         if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1080                 error = EBUSY;
1081                 goto out;
1082         }
1083
1084 #if CONFIG_MACF
1085         error = mac_mount_check_mount(ctx, vp,
1086             cnp, fsname);
1087         if (error != 0)
1088                 goto out;
1089 #endif
1090
1091         vnode_lock_spin(vp);
1092         SET(vp->v_flag, VMOUNT);
1093         vnode_unlock(vp);
1094
1095 out:
1096         return error;
1097 }
1098
1099 #if CONFIG_IMGSRC_ACCESS
1100
1101 #if DEBUG
1102 #define IMGSRC_DEBUG(args...) printf(args)
1103 #else
1104 #define IMGSRC_DEBUG(args...) do { } while(0)
1105 #endif
1106
1107 static int
1108 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1109 {
1110         struct nameidata nd;
1111         vnode_t vp, realdevvp;
1112         mode_t accessmode;
1113         int error;
1114
1115         NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1116         if ( (error = namei(&nd)) ) {
1117                 IMGSRC_DEBUG("namei() failed with %d\n", error);
1118                 return error;
1119         }
1120
1121         vp = nd.ni_vp;
1122
1123         if (!vnode_isblk(vp)) {
1124                 IMGSRC_DEBUG("Not block device.\n");
1125                 error = ENOTBLK;
1126                 goto out;
1127         }
1128
1129         realdevvp = mp->mnt_devvp;
1130         if (realdevvp == NULLVP) {
1131                 IMGSRC_DEBUG("No device backs the mount.\n");
1132                 error = ENXIO;
1133                 goto out;
1134         }
1135
1136         error = vnode_getwithref(realdevvp);
1137         if (error != 0) {
1138                 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1139                 goto out;
1140         }
1141
1142         if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1143                 IMGSRC_DEBUG("Wrong dev_t.\n");
1144                 error = ENXIO;
1145                 goto out1;
1146         }
1147
1148         strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1149
1150         /*
1151          * If mount by non-root, then verify that user has necessary
1152          * permissions on the device.
1153          */
1154         if (!vfs_context_issuser(ctx)) {
1155                 accessmode = KAUTH_VNODE_READ_DATA;
1156                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1157                         accessmode |= KAUTH_VNODE_WRITE_DATA;
1158                 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1159                         IMGSRC_DEBUG("Access denied.\n");
1160                         goto out1;
1161                 }
1162         }
1163
1164         *devvpp = vp;
1165
1166 out1:
1167         vnode_put(realdevvp);
1168 out:
1169         nameidone(&nd);
1170         if (error) {
1171                 vnode_put(vp);
1172         }
1173
1174         return error;
1175 }
1176
1177 /*
1178  * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1179  * and call checkdirs()
1180  */
1181 static int
1182 place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1183 {
1184         int error;
1185
1186         mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1187
1188         vnode_lock_spin(vp);
1189         CLR(vp->v_flag, VMOUNT);
1190         vp->v_mountedhere = mp;
1191         vnode_unlock(vp);
1192
1193         /*
1194          * taking the name_cache_lock exclusively will
1195          * insure that everyone is out of the fast path who
1196          * might be trying to use a now stale copy of
1197          * vp->v_mountedhere->mnt_realrootvp
1198          * bumping mount_generation causes the cached values
1199          * to be invalidated
1200          */
1201         name_cache_lock();
1202         mount_generation++;
1203         name_cache_unlock();
1204
1205         error = vnode_ref(vp);
1206         if (error != 0) {
1207                 goto out;
1208         }
1209
1210         error = checkdirs(vp, ctx);
1211         if (error != 0)  {
1212                 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1213                 vnode_rele(vp);
1214                 goto out;
1215         }
1216
1217 out:
1218         if (error != 0) {
1219                 mp->mnt_vnodecovered = NULLVP;
1220         }
1221         return error;
1222 }
1223
1224 static void
1225 undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1226 {
1227         vnode_rele(vp);
1228         vnode_lock_spin(vp);
1229         vp->v_mountedhere = (mount_t)NULL;
1230         vnode_unlock(vp);
1231
1232         mp->mnt_vnodecovered = NULLVP;
1233 }
1234
1235 static int
1236 mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1237 {
1238         int error;
1239
1240         /* unmount in progress return error */
1241         mount_lock_spin(mp);
1242         if (mp->mnt_lflag & MNT_LUNMOUNT) {
1243                 mount_unlock(mp);
1244                 return EBUSY;
1245         }
1246         mount_unlock(mp);
1247         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1248
1249         /*
1250          * We only allow the filesystem to be reloaded if it
1251          * is currently mounted read-only.
1252          */
1253         if ((flags & MNT_RELOAD) &&
1254                         ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1255                 error = ENOTSUP;
1256                 goto out;
1257         }
1258
1259         /*
1260          * Only root, or the user that did the original mount is
1261          * permitted to update it.
1262          */
1263         if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1264                         (!vfs_context_issuser(ctx))) {
1265                 error = EPERM;
1266                 goto out;
1267         }
1268 #if CONFIG_MACF
1269         error = mac_mount_check_remount(ctx, mp);
1270         if (error != 0) {
1271                 goto out;
1272         }
1273 #endif
1274
1275 out:
1276         if (error) {
1277                 lck_rw_done(&mp->mnt_rwlock);
1278         }
1279
1280         return error;
1281 }
1282
1283 static void
1284 mount_end_update(mount_t mp)
1285 {
1286         lck_rw_done(&mp->mnt_rwlock);
1287 }
1288
1289 static int
1290 get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1291 {
1292         vnode_t vp;
1293
1294         if (height >= MAX_IMAGEBOOT_NESTING) {
1295                 return EINVAL;
1296         }
1297
1298         vp = imgsrc_rootvnodes[height];
1299         if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1300                 *rvpp = vp;
1301                 return 0;
1302         } else {
1303                 return ENOENT;
1304         }
1305 }
1306
1307 static int
1308 relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1309                 const char *fsname, vfs_context_t ctx,
1310                 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
1311 {
1312         int error;
1313         mount_t mp;
1314         boolean_t placed = FALSE;
1315         vnode_t devvp = NULLVP;
1316         struct vfstable *vfsp;
1317         user_addr_t devpath;
1318         char *old_mntonname;
1319         vnode_t rvp;
1320         uint32_t height;
1321         uint32_t flags;
1322
1323         /* If we didn't imageboot, nothing to move */
1324         if (imgsrc_rootvnodes[0] == NULLVP) {
1325                 return EINVAL;
1326         }
1327
1328         /* Only root can do this */
1329         if (!vfs_context_issuser(ctx)) {
1330                 return EPERM;
1331         }
1332
1333         IMGSRC_DEBUG("looking for root vnode.\n");
1334
1335         /*
1336          * Get root vnode of filesystem we're moving.
1337          */
1338         if (by_index) {
1339                 if (is64bit) {
1340                         struct user64_mnt_imgsrc_args mia64;
1341                         error = copyin(fsmountargs, &mia64, sizeof(mia64));
1342                         if (error != 0) {
1343                                 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1344                                 return error;
1345                         }
1346
1347                         height = mia64.mi_height;
1348                         flags = mia64.mi_flags;
1349                         devpath = mia64.mi_devpath;
1350                 } else {
1351                         struct user32_mnt_imgsrc_args mia32;
1352                         error = copyin(fsmountargs, &mia32, sizeof(mia32));
1353                         if (error != 0) {
1354                                 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1355                                 return error;
1356                         }
1357
1358                         height = mia32.mi_height;
1359                         flags = mia32.mi_flags;
1360                         devpath = mia32.mi_devpath;
1361                 }
1362         } else {
1363                 /*
1364                  * For binary compatibility--assumes one level of nesting.
1365                  */
1366                 if (is64bit) {
1367                         if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1368                                 return error;
1369                 } else {
1370                         user32_addr_t tmp;
1371                         if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1372                                 return error;
1373
1374                         /* munge into LP64 addr */
1375                         devpath = CAST_USER_ADDR_T(tmp);
1376                 }
1377
1378                 height = 0;
1379                 flags = 0;
1380         }
1381
1382         if (flags != 0) {
1383                 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1384                 return EINVAL;
1385         }
1386
1387         error = get_imgsrc_rootvnode(height, &rvp);
1388         if (error != 0) {
1389                 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
1390                 return error;
1391         }
1392
1393         IMGSRC_DEBUG("got root vnode.\n");
1394
1395         MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1396
1397         /* Can only move once */
1398         mp = vnode_mount(rvp);
1399         if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1400                 IMGSRC_DEBUG("Already moved.\n");
1401                 error = EBUSY;
1402                 goto out0;
1403         }
1404
1405         IMGSRC_DEBUG("Starting updated.\n");
1406
1407         /* Get exclusive rwlock on mount, authorize update on mp */
1408         error = mount_begin_update(mp , ctx, 0);
1409         if (error != 0) {
1410                 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
1411                 goto out0;
1412         }
1413
1414         /*
1415          * It can only be moved once.  Flag is set under the rwlock,
1416          * so we're now safe to proceed.
1417          */
1418         if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1419                 IMGSRC_DEBUG("Already moved [2]\n");
1420                 goto out1;
1421         }
1422
1423
1424         IMGSRC_DEBUG("Preparing coveredvp.\n");
1425
1426         /* Mark covered vnode as mount in progress, authorize placing mount on top */
1427         error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
1428         if (error != 0) {
1429                 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
1430                 goto out1;
1431         }
1432
1433         IMGSRC_DEBUG("Covered vp OK.\n");
1434
1435         /* Sanity check the name caller has provided */
1436         vfsp = mp->mnt_vtable;
1437         if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1438                 IMGSRC_DEBUG("Wrong fs name.\n");
1439                 error = EINVAL;
1440                 goto out2;
1441         }
1442
1443         /* Check the device vnode and update mount-from name, for local filesystems */
1444         if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1445                 IMGSRC_DEBUG("Local, doing device validation.\n");
1446
1447                 if (devpath != USER_ADDR_NULL) {
1448                         error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1449                         if (error) {
1450                                 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1451                                 goto out2;
1452                         }
1453
1454                         vnode_put(devvp);
1455                 }
1456         }
1457
1458         /*
1459          * Place mp on top of vnode, ref the vnode,  call checkdirs(),
1460          * and increment the name cache's mount generation
1461          */
1462
1463         IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1464         error = place_mount_and_checkdirs(mp, vp, ctx);
1465         if (error != 0) {
1466                 goto out2;
1467         }
1468
1469         placed = TRUE;
1470
1471         strncpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1472         strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1473
1474         /* Forbid future moves */
1475         mount_lock(mp);
1476         mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1477         mount_unlock(mp);
1478
1479         /* Finally, add to mount list, completely ready to go */
1480         if (mount_list_add(mp) != 0) {
1481                 /*
1482                  * The system is shutting down trying to umount
1483                  * everything, so fail with a plausible errno.
1484                  */
1485                 error = EBUSY;
1486                 goto out3;
1487         }
1488
1489         mount_end_update(mp);
1490         vnode_put(rvp);
1491         FREE(old_mntonname, M_TEMP);
1492
1493         vfs_notify_mount(pvp);
1494
1495         return 0;
1496 out3:
1497         strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1498
1499         mount_lock(mp);
1500         mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1501         mount_unlock(mp);
1502
1503 out2:
1504         /*
1505          * Placing the mp on the vnode clears VMOUNT,
1506          * so cleanup is different after that point
1507          */
1508         if (placed) {
1509                 /* Rele the vp, clear VMOUNT and v_mountedhere */
1510                 undo_place_on_covered_vp(mp, vp);
1511         } else {
1512                 vnode_lock_spin(vp);
1513                 CLR(vp->v_flag, VMOUNT);
1514                 vnode_unlock(vp);
1515         }
1516 out1:
1517         mount_end_update(mp);
1518
1519 out0:
1520         vnode_put(rvp);
1521         FREE(old_mntonname, M_TEMP);
1522         return error;
1523 }
1524
1525 #endif /* CONFIG_IMGSRC_ACCESS */
1526
1527 void
1528 enablequotas(struct mount *mp, vfs_context_t ctx)
1529 {
1530         struct nameidata qnd;
1531         int type;
1532         char qfpath[MAXPATHLEN];
1533         const char *qfname = QUOTAFILENAME;
1534         const char *qfopsname = QUOTAOPSNAME;
1535         const char *qfextension[] = INITQFNAMES;
1536
1537         /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1538         if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1539                 return;
1540         }
1541         /*
1542          * Enable filesystem disk quotas if necessary.
1543          * We ignore errors as this should not interfere with final mount
1544          */
1545         for (type=0; type < MAXQUOTAS; type++) {
1546                 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1547                 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1548                        CAST_USER_ADDR_T(qfpath), ctx);
1549                 if (namei(&qnd) != 0)
1550                         continue;           /* option file to trigger quotas is not present */
1551                 vnode_put(qnd.ni_vp);
1552                 nameidone(&qnd);
1553                 snprintf(qfpath, sizeof(qfpath),  "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1554
1555                 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1556         }
1557         return;
1558 }
1559
1560
1561 static int
1562 checkdirs_callback(proc_t p, void * arg)
1563 {
1564         struct cdirargs * cdrp = (struct cdirargs * )arg;
1565         vnode_t olddp = cdrp->olddp;
1566         vnode_t newdp = cdrp->newdp;
1567         struct filedesc *fdp;
1568         vnode_t tvp;
1569         vnode_t fdp_cvp;
1570         vnode_t fdp_rvp;
1571         int cdir_changed = 0;
1572         int rdir_changed = 0;
1573
1574         /*
1575          * XXX Also needs to iterate each thread in the process to see if it
1576          * XXX is using a per-thread current working directory, and, if so,
1577          * XXX update that as well.
1578          */
1579
1580         proc_fdlock(p);
1581         fdp = p->p_fd;
1582         if (fdp == (struct filedesc *)0) {
1583                 proc_fdunlock(p);
1584                 return(PROC_RETURNED);
1585         }
1586         fdp_cvp = fdp->fd_cdir;
1587         fdp_rvp = fdp->fd_rdir;
1588         proc_fdunlock(p);
1589
1590         if (fdp_cvp == olddp) {
1591                 vnode_ref(newdp);
1592                 tvp = fdp->fd_cdir;
1593                 fdp_cvp = newdp;
1594                 cdir_changed = 1;
1595                 vnode_rele(tvp);
1596         }
1597         if (fdp_rvp == olddp) {
1598                 vnode_ref(newdp);
1599                 tvp = fdp->fd_rdir;
1600                 fdp_rvp = newdp;
1601                 rdir_changed = 1;
1602                 vnode_rele(tvp);
1603         }
1604         if (cdir_changed || rdir_changed) {
1605                 proc_fdlock(p);
1606                 fdp->fd_cdir = fdp_cvp;
1607                 fdp->fd_rdir = fdp_rvp;
1608                 proc_fdunlock(p);
1609         }
1610         return(PROC_RETURNED);
1611 }
1612
1613
1614
1615 /*
1616  * Scan all active processes to see if any of them have a current
1617  * or root directory onto which the new filesystem has just been
1618  * mounted. If so, replace them with the new mount point.
1619  */
1620 static int
1621 checkdirs(vnode_t olddp, vfs_context_t ctx)
1622 {
1623         vnode_t newdp;
1624         vnode_t tvp;
1625         int err;
1626         struct cdirargs cdr;
1627         struct uthread * uth = get_bsdthread_info(current_thread());
1628
1629         if (olddp->v_usecount == 1)
1630                 return(0);
1631         if (uth != (struct uthread *)0)
1632                 uth->uu_notrigger = 1;
1633         err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
1634         if (uth != (struct uthread *)0)
1635                 uth->uu_notrigger = 0;
1636
1637         if (err != 0) {
1638 #if DIAGNOSTIC
1639                 panic("mount: lost mount: error %d", err);
1640 #endif
1641                 return(err);
1642         }
1643
1644         cdr.olddp = olddp;
1645         cdr.newdp = newdp;
1646         /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1647         proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
1648
1649         if (rootvnode == olddp) {
1650                 vnode_ref(newdp);
1651                 tvp = rootvnode;
1652                 rootvnode = newdp;
1653                 vnode_rele(tvp);
1654         }
1655
1656         vnode_put(newdp);
1657         return(0);
1658 }
1659
1660 /*
1661  * Unmount a file system.
1662  *
1663  * Note: unmount takes a path to the vnode mounted on as argument,
1664  * not special file (as before).
1665  */
1666 /* ARGSUSED */
1667 int
1668 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1669 {
1670         vnode_t vp;
1671         struct mount *mp;
1672         int error;
1673         struct nameidata nd;
1674         vfs_context_t ctx = vfs_context_current();
1675
1676         NDINIT(&nd, LOOKUP, OP_UNMOUNT, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1677                 UIO_USERSPACE, uap->path, ctx);
1678         error = namei(&nd);
1679         if (error)
1680                 return (error);
1681         vp = nd.ni_vp;
1682         mp = vp->v_mount;
1683         nameidone(&nd);
1684
1685 #if CONFIG_MACF
1686         error = mac_mount_check_umount(ctx, mp);
1687         if (error != 0) {
1688                 vnode_put(vp);
1689                 return (error);
1690         }
1691 #endif
1692         /*
1693          * Must be the root of the filesystem
1694          */
1695         if ((vp->v_flag & VROOT) == 0) {
1696                 vnode_put(vp);
1697                 return (EINVAL);
1698         }
1699         mount_ref(mp, 0);
1700         vnode_put(vp);
1701         /* safedounmount consumes the mount ref */
1702         return (safedounmount(mp, uap->flags, ctx));
1703 }
1704
1705 int
1706 vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
1707 {
1708         mount_t mp;
1709
1710         mp = mount_list_lookupby_fsid(fsid, 0, 1);
1711         if (mp == (mount_t)0) {
1712                 return(ENOENT);
1713         }
1714         mount_ref(mp, 0);
1715         mount_iterdrop(mp);
1716         /* safedounmount consumes the mount ref */
1717         return(safedounmount(mp, flags, ctx));
1718 }
1719
1720
1721 /*
1722  * The mount struct comes with a mount ref which will be consumed.
1723  * Do the actual file system unmount, prevent some common foot shooting.
1724  */
1725 int
1726 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1727 {
1728         int error;
1729         proc_t p = vfs_context_proc(ctx);
1730
1731         /*
1732          * Skip authorization if the mount is tagged as permissive and
1733          * this is not a forced-unmount attempt.
1734          */
1735         if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1736                 /*
1737                  * Only root, or the user that did the original mount is
1738                  * permitted to unmount this filesystem.
1739                  */
1740                 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1741                                 (error = suser(kauth_cred_get(), &p->p_acflag)))
1742                         goto out;
1743         }
1744         /*
1745          * Don't allow unmounting the root file system.
1746          */
1747         if (mp->mnt_flag & MNT_ROOTFS) {
1748                 error = EBUSY; /* the root is always busy */
1749                 goto out;
1750         }
1751
1752 #ifdef CONFIG_IMGSRC_ACCESS
1753         if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1754                 error = EBUSY;
1755                 goto out;
1756         }
1757 #endif /* CONFIG_IMGSRC_ACCESS */
1758
1759         return (dounmount(mp, flags, 1, ctx));
1760
1761 out:
1762         mount_drop(mp, 0);
1763         return(error);
1764 }
1765
1766 /*
1767  * Do the actual file system unmount.
1768  */
1769 int
1770 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1771 {
1772         vnode_t coveredvp = (vnode_t)0;
1773         int error;
1774         int needwakeup = 0;
1775         int forcedunmount = 0;
1776         int lflags = 0;
1777         struct vnode *devvp = NULLVP;
1778 #if CONFIG_TRIGGERS
1779         int did_vflush = 0;
1780 #endif /* CONFIG_TRIGGERS */
1781
1782         if (flags & MNT_FORCE)
1783                 forcedunmount = 1;
1784
1785         mount_lock(mp);
1786         /* XXX post jaguar fix LK_DRAIN - then clean this up */
1787         if ((flags & MNT_FORCE)) {
1788                 mp->mnt_kern_flag |= MNTK_FRCUNMOUNT;
1789                 mp->mnt_lflag |= MNT_LFORCE;
1790         }
1791         if (mp->mnt_lflag & MNT_LUNMOUNT) {
1792                 mp->mnt_lflag |= MNT_LWAIT;
1793                 if(withref != 0)
1794                         mount_drop(mp, 1);
1795                 msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "dounmount", NULL);
1796                 /*
1797                  * The prior unmount attempt has probably succeeded.
1798                  * Do not dereference mp here - returning EBUSY is safest.
1799                  */
1800                 return (EBUSY);
1801         }
1802         mp->mnt_kern_flag |= MNTK_UNMOUNT;
1803         mp->mnt_lflag |= MNT_LUNMOUNT;
1804         mp->mnt_flag &=~ MNT_ASYNC;
1805         /*
1806          * anyone currently in the fast path that
1807          * trips over the cached rootvp will be
1808          * dumped out and forced into the slow path
1809          * to regenerate a new cached value
1810          */
1811         mp->mnt_realrootvp = NULLVP;
1812         mount_unlock(mp);
1813
1814         /*
1815          * taking the name_cache_lock exclusively will
1816          * insure that everyone is out of the fast path who
1817          * might be trying to use a now stale copy of
1818          * vp->v_mountedhere->mnt_realrootvp
1819          * bumping mount_generation causes the cached values
1820          * to be invalidated
1821          */
1822         name_cache_lock();
1823         mount_generation++;
1824         name_cache_unlock();
1825
1826
1827         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1828         if (withref != 0)
1829                 mount_drop(mp, 0);
1830 #if CONFIG_FSE
1831         fsevent_unmount(mp);  /* has to come first! */
1832 #endif
1833         error = 0;
1834         if (forcedunmount == 0) {
1835                 ubc_umount(mp); /* release cached vnodes */
1836                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1837                         error = VFS_SYNC(mp, MNT_WAIT, ctx);
1838                         if (error) {
1839                                 mount_lock(mp);
1840                                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1841                                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1842                                 mp->mnt_lflag &= ~MNT_LFORCE;
1843                                 goto out;
1844                         }
1845                 }
1846         }
1847
1848 #if CONFIG_TRIGGERS
1849         vfs_nested_trigger_unmounts(mp, flags, ctx);
1850         did_vflush = 1;
1851 #endif
1852         if (forcedunmount)
1853                 lflags |= FORCECLOSE;
1854         error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM  | SKIPROOT | lflags);
1855         if ((forcedunmount == 0) && error) {
1856                 mount_lock(mp);
1857                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1858                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1859                 mp->mnt_lflag &= ~MNT_LFORCE;
1860                 goto out;
1861         }
1862
1863         /* make sure there are no one in the mount iterations or lookup */
1864         mount_iterdrain(mp);
1865
1866         error = VFS_UNMOUNT(mp, flags, ctx);
1867         if (error) {
1868                 mount_iterreset(mp);
1869                 mount_lock(mp);
1870                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1871                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1872                 mp->mnt_lflag &= ~MNT_LFORCE;
1873                 goto out;
1874         }
1875
1876         /* increment the operations count */
1877         if (!error)
1878                 OSAddAtomic(1, &vfs_nummntops);
1879
1880         if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1881                 /* hold an io reference and drop the usecount before close */
1882                 devvp = mp->mnt_devvp;
1883                 vnode_getalways(devvp);
1884                 vnode_rele(devvp);
1885                 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1886                        ctx);
1887                 vnode_clearmountedon(devvp);
1888                 vnode_put(devvp);
1889         }
1890         lck_rw_done(&mp->mnt_rwlock);
1891         mount_list_remove(mp);
1892         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1893
1894         /* mark the mount point hook in the vp but not drop the ref yet */
1895         if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
1896                 vnode_getwithref(coveredvp);
1897                 vnode_lock_spin(coveredvp);
1898
1899                 mp->mnt_crossref++;
1900                 coveredvp->v_mountedhere = (struct mount *)0;
1901
1902                 vnode_unlock(coveredvp);
1903                 vnode_put(coveredvp);
1904         }
1905
1906         mount_list_lock();
1907         mp->mnt_vtable->vfc_refcount--;
1908         mount_list_unlock();
1909
1910         cache_purgevfs(mp);     /* remove cache entries for this file sys */
1911         vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
1912         mount_lock(mp);
1913         mp->mnt_lflag |= MNT_LDEAD;
1914
1915         if (mp->mnt_lflag & MNT_LWAIT) {
1916                 /*
1917                  * do the wakeup here
1918                  * in case we block in mount_refdrain
1919                  * which will drop the mount lock
1920                  * and allow anyone blocked in vfs_busy
1921                  * to wakeup and see the LDEAD state
1922                  */
1923                 mp->mnt_lflag &= ~MNT_LWAIT;
1924                 wakeup((caddr_t)mp);
1925         }
1926         mount_refdrain(mp);
1927 out:
1928         if (mp->mnt_lflag & MNT_LWAIT) {
1929                 mp->mnt_lflag &= ~MNT_LWAIT;
1930                 needwakeup = 1;
1931         }
1932
1933
1934 #if CONFIG_TRIGGERS
1935         /*
1936          * Callback and context are set together under the mount lock, and
1937          * never cleared, so we're safe to examine them here, drop the lock,
1938          * and call out.
1939          */
1940         if (mp->mnt_triggercallback != NULL) {
1941                 mount_unlock(mp);
1942                 if (error == 0) {
1943                         mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
1944                 } else if (did_vflush) {
1945                         mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
1946                 }
1947         } else {
1948                 mount_unlock(mp);
1949         }
1950 #else
1951         mount_unlock(mp);
1952 #endif /* CONFIG_TRIGGERS */
1953
1954         lck_rw_done(&mp->mnt_rwlock);
1955
1956         if (needwakeup)
1957                 wakeup((caddr_t)mp);
1958
1959         if (!error) {
1960                 if ((coveredvp != NULLVP)) {
1961                         vnode_t pvp;
1962
1963                         vnode_getwithref(coveredvp);
1964                         pvp = vnode_getparent(coveredvp);
1965                         vnode_rele(coveredvp);
1966
1967                         mount_dropcrossref(mp, coveredvp, 0);
1968 #if CONFIG_TRIGGERS
1969                         if (coveredvp->v_resolve)
1970                                 vnode_trigger_rearm(coveredvp, ctx);
1971 #endif
1972                         vnode_put(coveredvp);
1973
1974                         if (pvp) {
1975                                 lock_vnode_and_post(pvp, NOTE_WRITE);
1976                                 vnode_put(pvp);
1977                         }
1978                 } else if (mp->mnt_flag & MNT_ROOTFS) {
1979                                 mount_lock_destroy(mp);
1980 #if CONFIG_MACF
1981                                 mac_mount_label_destroy(mp);
1982 #endif
1983                                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1984                 } else
1985                         panic("dounmount: no coveredvp");
1986         }
1987         return (error);
1988 }
1989
1990 void
1991 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
1992 {
1993         vnode_lock(dp);
1994         mp->mnt_crossref--;
1995
1996         if (mp->mnt_crossref < 0)
1997                 panic("mount cross refs -ve");
1998
1999         if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2000
2001                 if (need_put)
2002                         vnode_put_locked(dp);
2003                 vnode_unlock(dp);
2004
2005                 mount_lock_destroy(mp);
2006 #if CONFIG_MACF
2007                 mac_mount_label_destroy(mp);
2008 #endif
2009                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2010                 return;
2011         }
2012         if (need_put)
2013                 vnode_put_locked(dp);
2014         vnode_unlock(dp);
2015 }
2016
2017
2018 /*
2019  * Sync each mounted filesystem.
2020  */
2021 #if DIAGNOSTIC
2022 int syncprt = 0;
2023 struct ctldebug debug0 = { "syncprt", &syncprt };
2024 #endif
2025
2026 int print_vmpage_stat=0;
2027
2028 static int
2029 sync_callback(mount_t mp, void * arg)
2030 {
2031         int asyncflag;
2032
2033         if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2034                         asyncflag = mp->mnt_flag & MNT_ASYNC;
2035                         mp->mnt_flag &= ~MNT_ASYNC;
2036                         VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_current());
2037                         if (asyncflag)
2038                                 mp->mnt_flag |= MNT_ASYNC;
2039         }
2040         return(VFS_RETURNED);
2041 }
2042
2043
2044 #include <kern/clock.h>
2045
2046 clock_sec_t sync_wait_time = 0;
2047
2048 /* ARGSUSED */
2049 int
2050 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
2051 {
2052         clock_nsec_t nsecs;
2053
2054         vfs_iterate(LK_NOWAIT, sync_callback, (void *)0);
2055
2056         {
2057                 static fsid_t fsid = { { 0, 0 } };
2058
2059                 clock_get_calendar_microtime(&sync_wait_time, &nsecs);
2060                 vfs_event_signal(&fsid, VQ_SYNCEVENT, (intptr_t)NULL);
2061                 wakeup((caddr_t)&sync_wait_time);
2062         }
2063
2064         {
2065         if(print_vmpage_stat) {
2066                 vm_countdirtypages();
2067         }
2068         }
2069 #if DIAGNOSTIC
2070         if (syncprt)
2071                 vfs_bufstats();
2072 #endif /* DIAGNOSTIC */
2073         return (0);
2074 }
2075
2076 /*
2077  * Change filesystem quotas.
2078  */
2079 #if QUOTA
2080 static int quotactl_funneled(proc_t p, struct quotactl_args *uap, int32_t *retval);
2081
2082 int
2083 quotactl(proc_t p, struct quotactl_args *uap, int32_t *retval)
2084 {
2085         boolean_t funnel_state;
2086         int error;
2087
2088         funnel_state = thread_funnel_set(kernel_flock, TRUE);
2089         error = quotactl_funneled(p, uap, retval);
2090         thread_funnel_set(kernel_flock, funnel_state);
2091         return(error);
2092 }
2093
2094 static int
2095 quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
2096 {
2097         struct mount *mp;
2098         int error, quota_cmd, quota_status;
2099         caddr_t datap;
2100         size_t fnamelen;
2101         struct nameidata nd;
2102         vfs_context_t ctx = vfs_context_current();
2103         struct dqblk my_dqblk;
2104
2105         AUDIT_ARG(uid, uap->uid);
2106         AUDIT_ARG(cmd, uap->cmd);
2107         NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2108                uap->path, ctx);
2109         error = namei(&nd);
2110         if (error)
2111                 return (error);
2112         mp = nd.ni_vp->v_mount;
2113         vnode_put(nd.ni_vp);
2114         nameidone(&nd);
2115
2116         /* copyin any data we will need for downstream code */
2117         quota_cmd = uap->cmd >> SUBCMDSHIFT;
2118
2119         switch (quota_cmd) {
2120         case Q_QUOTAON:
2121                 /* uap->arg specifies a file from which to take the quotas */
2122                 fnamelen = MAXPATHLEN;
2123                 datap = kalloc(MAXPATHLEN);
2124                 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2125                 break;
2126         case Q_GETQUOTA:
2127                 /* uap->arg is a pointer to a dqblk structure. */
2128                 datap = (caddr_t) &my_dqblk;
2129                 break;
2130         case Q_SETQUOTA:
2131         case Q_SETUSE:
2132                 /* uap->arg is a pointer to a dqblk structure. */
2133                 datap = (caddr_t) &my_dqblk;
2134                 if (proc_is64bit(p)) {
2135                         struct user_dqblk       my_dqblk64;
2136                         error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2137                         if (error == 0) {
2138                                 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2139                         }
2140                 }
2141                 else {
2142                         error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2143                 }
2144                 break;
2145         case Q_QUOTASTAT:
2146                 /* uap->arg is a pointer to an integer */
2147                 datap = (caddr_t) &quota_status;
2148                 break;
2149         default:
2150                 datap = NULL;
2151                 break;
2152         } /* switch */
2153
2154         if (error == 0) {
2155                 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
2156         }
2157
2158         switch (quota_cmd) {
2159         case Q_QUOTAON:
2160                 if (datap != NULL)
2161                         kfree(datap, MAXPATHLEN);
2162                 break;
2163         case Q_GETQUOTA:
2164                 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2165                 if (error == 0) {
2166                         if (proc_is64bit(p)) {
2167                                 struct user_dqblk       my_dqblk64;
2168                                 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2169                                 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2170                         }
2171                         else {
2172                                 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2173                         }
2174                 }
2175                 break;
2176         case Q_QUOTASTAT:
2177                 /* uap->arg is a pointer to an integer */
2178                 if (error == 0) {
2179                         error = copyout(datap, uap->arg, sizeof(quota_status));
2180                 }
2181                 break;
2182         default:
2183                 break;
2184         } /* switch */
2185
2186         return (error);
2187 }
2188 #else
2189 int
2190 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2191 {
2192         return (EOPNOTSUPP);
2193 }
2194 #endif /* QUOTA */
2195
2196 /*
2197  * Get filesystem statistics.
2198  *
2199  * Returns:     0                       Success
2200  *      namei:???
2201  *      vfs_update_vfsstat:???
2202  *      munge_statfs:EFAULT
2203  */
2204 /* ARGSUSED */
2205 int
2206 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
2207 {
2208         struct mount *mp;
2209         struct vfsstatfs *sp;
2210         int error;
2211         struct nameidata nd;
2212         vfs_context_t ctx = vfs_context_current();
2213         vnode_t vp;
2214
2215         NDINIT(&nd, LOOKUP, OP_STATFS, NOTRIGGER | FOLLOW | AUDITVNPATH1,
2216                 UIO_USERSPACE, uap->path, ctx);
2217         error = namei(&nd);
2218         if (error)
2219                 return (error);
2220         vp = nd.ni_vp;
2221         mp = vp->v_mount;
2222         sp = &mp->mnt_vfsstat;
2223         nameidone(&nd);
2224
2225         error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
2226         vnode_put(vp);
2227         if (error != 0)
2228                 return (error);
2229
2230         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2231         return (error);
2232 }
2233
2234 /*
2235  * Get filesystem statistics.
2236  */
2237 /* ARGSUSED */
2238 int
2239 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
2240 {
2241         vnode_t vp;
2242         struct mount *mp;
2243         struct vfsstatfs *sp;
2244         int error;
2245
2246         AUDIT_ARG(fd, uap->fd);
2247
2248         if ( (error = file_vnode(uap->fd, &vp)) )
2249                 return (error);
2250
2251         error = vnode_getwithref(vp);
2252         if (error) {
2253                 file_drop(uap->fd);
2254                 return (error);
2255         }
2256
2257         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2258
2259         mp = vp->v_mount;
2260         if (!mp) {
2261                 error = EBADF;
2262                 goto out;
2263         }
2264         sp = &mp->mnt_vfsstat;
2265         if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
2266                 goto out;
2267         }
2268
2269         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2270
2271 out:
2272         file_drop(uap->fd);
2273         vnode_put(vp);
2274
2275         return (error);
2276 }
2277
2278 /*
2279  * Common routine to handle copying of statfs64 data to user space
2280  */
2281 static int
2282 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2283 {
2284         int error;
2285         struct statfs64 sfs;
2286
2287         bzero(&sfs, sizeof(sfs));
2288
2289         sfs.f_bsize = sfsp->f_bsize;
2290         sfs.f_iosize = (int32_t)sfsp->f_iosize;
2291         sfs.f_blocks = sfsp->f_blocks;
2292         sfs.f_bfree = sfsp->f_bfree;
2293         sfs.f_bavail = sfsp->f_bavail;
2294         sfs.f_files = sfsp->f_files;
2295         sfs.f_ffree = sfsp->f_ffree;
2296         sfs.f_fsid = sfsp->f_fsid;
2297         sfs.f_owner = sfsp->f_owner;
2298         sfs.f_type = mp->mnt_vtable->vfc_typenum;
2299         sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2300         sfs.f_fssubtype = sfsp->f_fssubtype;
2301         if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2302                 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2303         } else {
2304                 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2305         }
2306         strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2307         strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2308
2309         error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2310
2311         return(error);
2312 }
2313
2314 /*
2315  * Get file system statistics in 64-bit mode
2316  */
2317 int
2318 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2319 {
2320         struct mount *mp;
2321         struct vfsstatfs *sp;
2322         int error;
2323         struct nameidata nd;
2324         vfs_context_t ctxp = vfs_context_current();
2325         vnode_t vp;
2326
2327         NDINIT(&nd, LOOKUP, OP_STATFS, NOTRIGGER | FOLLOW | AUDITVNPATH1,
2328                 UIO_USERSPACE, uap->path, ctxp);
2329         error = namei(&nd);
2330         if (error)
2331                 return (error);
2332         vp = nd.ni_vp;
2333         mp = vp->v_mount;
2334         sp = &mp->mnt_vfsstat;
2335         nameidone(&nd);
2336
2337         error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
2338         vnode_put(vp);
2339         if (error != 0)
2340                 return (error);
2341
2342         error = statfs64_common(mp, sp, uap->buf);
2343
2344         return (error);
2345 }
2346
2347 /*
2348  * Get file system statistics in 64-bit mode
2349  */
2350 int
2351 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2352 {
2353         struct vnode *vp;
2354         struct mount *mp;
2355         struct vfsstatfs *sp;
2356         int error;
2357
2358         AUDIT_ARG(fd, uap->fd);
2359
2360         if ( (error = file_vnode(uap->fd, &vp)) )
2361                 return (error);
2362
2363         error = vnode_getwithref(vp);
2364         if (error) {
2365                 file_drop(uap->fd);
2366                 return (error);
2367         }
2368
2369         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2370
2371         mp = vp->v_mount;
2372         if (!mp) {
2373                 error = EBADF;;
2374                 goto out;
2375         }
2376         sp = &mp->mnt_vfsstat;
2377         if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2378                 goto out;
2379         }
2380
2381         error = statfs64_common(mp, sp, uap->buf);
2382
2383 out:
2384         file_drop(uap->fd);
2385         vnode_put(vp);
2386
2387         return (error);
2388 }
2389
2390 struct getfsstat_struct {
2391         user_addr_t     sfsp;
2392         user_addr_t     *mp;
2393         int             count;
2394         int             maxcount;
2395         int             flags;
2396         int             error;
2397 };
2398
2399
2400 static int
2401 getfsstat_callback(mount_t mp, void * arg)
2402 {
2403
2404         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2405         struct vfsstatfs *sp;
2406         int error, my_size;
2407         vfs_context_t ctx = vfs_context_current();
2408
2409         if (fstp->sfsp && fstp->count < fstp->maxcount) {
2410                 sp = &mp->mnt_vfsstat;
2411                 /*
2412                  * If MNT_NOWAIT is specified, do not refresh the
2413                  * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2414                  */
2415                 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2416                         (error = vfs_update_vfsstat(mp, ctx,
2417                             VFS_USER_EVENT))) {
2418                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2419                         return(VFS_RETURNED);
2420                 }
2421
2422                 /*
2423                  * Need to handle LP64 version of struct statfs
2424                  */
2425                 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
2426                 if (error) {
2427                         fstp->error = error;
2428                         return(VFS_RETURNED_DONE);
2429                 }
2430                 fstp->sfsp += my_size;
2431
2432                 if (fstp->mp) {
2433                         error = mac_mount_label_get(mp, *fstp->mp);
2434                         if (error) {
2435                                 fstp->error = error;
2436                                 return(VFS_RETURNED_DONE);
2437                         }
2438                         fstp->mp++;
2439                 }
2440         }
2441         fstp->count++;
2442         return(VFS_RETURNED);
2443 }
2444
2445 /*
2446  * Get statistics on all filesystems.
2447  */
2448 int
2449 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2450 {
2451         struct __mac_getfsstat_args muap;
2452
2453         muap.buf = uap->buf;
2454         muap.bufsize = uap->bufsize;
2455         muap.mac = USER_ADDR_NULL;
2456         muap.macsize = 0;
2457         muap.flags = uap->flags;
2458
2459         return (__mac_getfsstat(p, &muap, retval));
2460 }
2461
2462 /*
2463  * __mac_getfsstat: Get MAC-related file system statistics
2464  *
2465  * Parameters:    p                        (ignored)
2466  *                uap                      User argument descriptor (see below)
2467  *                retval                   Count of file system statistics (N stats)
2468  *
2469  * Indirect:      uap->bufsize             Buffer size
2470  *                uap->macsize             MAC info size
2471  *                uap->buf                 Buffer where information will be returned
2472  *                uap->mac                 MAC info
2473  *                uap->flags               File system flags
2474  *
2475  *
2476  * Returns:        0                       Success
2477  *                !0                       Not success
2478  *
2479  */
2480 int
2481 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
2482 {
2483         user_addr_t sfsp;
2484         user_addr_t *mp;
2485         size_t count, maxcount, bufsize, macsize;
2486         struct getfsstat_struct fst;
2487
2488         bufsize = (size_t) uap->bufsize;
2489         macsize = (size_t) uap->macsize;
2490
2491         if (IS_64BIT_PROCESS(p)) {
2492                 maxcount = bufsize / sizeof(struct user64_statfs);
2493         }
2494         else {
2495                 maxcount = bufsize / sizeof(struct user32_statfs);
2496         }
2497         sfsp = uap->buf;
2498         count = 0;
2499
2500         mp = NULL;
2501
2502 #if CONFIG_MACF
2503         if (uap->mac != USER_ADDR_NULL) {
2504                 u_int32_t *mp0;
2505                 int error;
2506                 unsigned int i;
2507
2508                 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2509                 if (count != maxcount)
2510                         return (EINVAL);
2511
2512                 /* Copy in the array */
2513                 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2514                 if (mp0 == NULL) {
2515                         return (ENOMEM);
2516                 }
2517
2518                 error = copyin(uap->mac, mp0, macsize);
2519                 if (error) {
2520                         FREE(mp0, M_MACTEMP);
2521                         return (error);
2522                 }
2523
2524                 /* Normalize to an array of user_addr_t */
2525                 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
2526                 if (mp == NULL) {
2527                         FREE(mp0, M_MACTEMP);
2528                         return (ENOMEM);
2529                 }
2530
2531                 for (i = 0; i < count; i++) {
2532                         if (IS_64BIT_PROCESS(p))
2533                                 mp[i] = ((user_addr_t *)mp0)[i];
2534                         else
2535                                 mp[i] = (user_addr_t)mp0[i];
2536                 }
2537                 FREE(mp0, M_MACTEMP);
2538         }
2539 #endif
2540
2541
2542         fst.sfsp = sfsp;
2543         fst.mp = mp;
2544         fst.flags = uap->flags;
2545         fst.count = 0;
2546         fst.error = 0;
2547         fst.maxcount = maxcount;
2548
2549
2550         vfs_iterate(0, getfsstat_callback, &fst);
2551
2552         if (mp)
2553                 FREE(mp, M_MACTEMP);
2554
2555         if (fst.error ) {
2556                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2557                 return(fst.error);
2558         }
2559
2560         if (fst.sfsp && fst.count > fst.maxcount)
2561                 *retval = fst.maxcount;
2562         else
2563                 *retval = fst.count;
2564         return (0);
2565 }
2566
2567 static int
2568 getfsstat64_callback(mount_t mp, void * arg)
2569 {
2570         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2571         struct vfsstatfs *sp;
2572         int error;
2573
2574         if (fstp->sfsp && fstp->count < fstp->maxcount) {
2575                 sp = &mp->mnt_vfsstat;
2576                 /*
2577                  * If MNT_NOWAIT is specified, do not refresh the fsstat
2578                  * cache. MNT_WAIT overrides MNT_NOWAIT.
2579                  *
2580                  * We treat MNT_DWAIT as MNT_WAIT for all instances of
2581                  * getfsstat, since the constants are out of the same
2582                  * namespace.
2583                  */
2584                 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2585                      (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2586                     (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2587                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2588                         return(VFS_RETURNED);
2589                 }
2590
2591                 error = statfs64_common(mp, sp, fstp->sfsp);
2592                 if (error) {
2593                         fstp->error = error;
2594                         return(VFS_RETURNED_DONE);
2595                 }
2596                 fstp->sfsp += sizeof(struct statfs64);
2597         }
2598         fstp->count++;
2599         return(VFS_RETURNED);
2600 }
2601
2602 /*
2603  * Get statistics on all file systems in 64 bit mode.
2604  */
2605 int
2606 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2607 {
2608         user_addr_t sfsp;
2609         int count, maxcount;
2610         struct getfsstat_struct fst;
2611
2612         maxcount = uap->bufsize / sizeof(struct statfs64);
2613
2614         sfsp = uap->buf;
2615         count = 0;
2616
2617         fst.sfsp = sfsp;
2618         fst.flags = uap->flags;
2619         fst.count = 0;
2620         fst.error = 0;
2621         fst.maxcount = maxcount;
2622
2623         vfs_iterate(0, getfsstat64_callback, &fst);
2624
2625         if (fst.error ) {
2626                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2627                 return(fst.error);
2628         }
2629
2630         if (fst.sfsp && fst.count > fst.maxcount)
2631                 *retval = fst.maxcount;
2632         else
2633                 *retval = fst.count;
2634
2635         return (0);
2636 }
2637
2638 /*
2639  * Change current working directory to a given file descriptor.
2640  */
2641 /* ARGSUSED */
2642 static int
2643 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
2644 {
2645         struct filedesc *fdp = p->p_fd;
2646         vnode_t vp;
2647         vnode_t tdp;
2648         vnode_t tvp;
2649         struct mount *mp;
2650         int error;
2651         vfs_context_t ctx = vfs_context_current();
2652
2653         AUDIT_ARG(fd, uap->fd);
2654         if (per_thread && uap->fd == -1) {
2655                 /*
2656                  * Switching back from per-thread to per process CWD; verify we
2657                  * in fact have one before proceeding.  The only success case
2658                  * for this code path is to return 0 preemptively after zapping
2659                  * the thread structure contents.
2660                  */
2661                 thread_t th = vfs_context_thread(ctx);
2662                 if (th) {
2663                         uthread_t uth = get_bsdthread_info(th);
2664                         tvp = uth->uu_cdir;
2665                         uth->uu_cdir = NULLVP;
2666                         if (tvp != NULLVP) {
2667                                 vnode_rele(tvp);
2668                                 return (0);
2669                         }
2670                 }
2671                 return (EBADF);
2672         }
2673
2674         if ( (error = file_vnode(uap->fd, &vp)) )
2675                 return(error);
2676         if ( (error = vnode_getwithref(vp)) ) {
2677                 file_drop(uap->fd);
2678                 return(error);
2679         }
2680
2681         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2682
2683         if (vp->v_type != VDIR) {
2684                 error = ENOTDIR;
2685                 goto out;
2686         }
2687
2688 #if CONFIG_MACF
2689         error = mac_vnode_check_chdir(ctx, vp);
2690         if (error)
2691                 goto out;
2692 #endif
2693         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2694         if (error)
2695                 goto out;
2696
2697         while (!error && (mp = vp->v_mountedhere) != NULL) {
2698                 if (vfs_busy(mp, LK_NOWAIT)) {
2699                         error = EACCES;
2700                         goto out;
2701                 }
2702                 error = VFS_ROOT(mp, &tdp, ctx);
2703                 vfs_unbusy(mp);
2704                 if (error)
2705                         break;
2706                 vnode_put(vp);
2707                 vp = tdp;
2708         }
2709         if (error)
2710                 goto out;
2711         if ( (error = vnode_ref(vp)) )
2712                 goto out;
2713         vnode_put(vp);
2714
2715         if (per_thread) {
2716                 thread_t th = vfs_context_thread(ctx);
2717                 if (th) {
2718                         uthread_t uth = get_bsdthread_info(th);
2719                         tvp = uth->uu_cdir;
2720                         uth->uu_cdir = vp;
2721                         OSBitOrAtomic(P_THCWD, &p->p_flag);
2722                 } else {
2723                         vnode_rele(vp);
2724                         return (ENOENT);
2725                 }
2726         } else {
2727                 proc_fdlock(p);
2728                 tvp = fdp->fd_cdir;
2729                 fdp->fd_cdir = vp;
2730                 proc_fdunlock(p);
2731         }
2732
2733         if (tvp)
2734                 vnode_rele(tvp);
2735         file_drop(uap->fd);
2736
2737         return (0);
2738 out:
2739         vnode_put(vp);
2740         file_drop(uap->fd);
2741
2742         return(error);
2743 }
2744
2745 int
2746 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
2747 {
2748         return common_fchdir(p, uap, 0);
2749 }
2750
2751 int
2752 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
2753 {
2754         return common_fchdir(p, (void *)uap, 1);
2755 }
2756
2757 /*
2758  * Change current working directory (".").
2759  *
2760  * Returns:     0                       Success
2761  *      change_dir:ENOTDIR
2762  *      change_dir:???
2763  *      vnode_ref:ENOENT                No such file or directory
2764  */
2765 /* ARGSUSED */
2766 static int
2767 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
2768 {
2769         struct filedesc *fdp = p->p_fd;
2770         int error;
2771         struct nameidata nd;
2772         vnode_t tvp;
2773         vfs_context_t ctx = vfs_context_current();
2774
2775         NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
2776                 UIO_USERSPACE, uap->path, ctx);
2777         error = change_dir(&nd, ctx);
2778         if (error)
2779                 return (error);
2780         if ( (error = vnode_ref(nd.ni_vp)) ) {
2781                 vnode_put(nd.ni_vp);
2782                 return (error);
2783         }
2784         /*
2785          * drop the iocount we picked up in change_dir
2786          */
2787         vnode_put(nd.ni_vp);
2788
2789         if (per_thread) {
2790                 thread_t th = vfs_context_thread(ctx);
2791                 if (th) {
2792                         uthread_t uth = get_bsdthread_info(th);
2793                         tvp = uth->uu_cdir;
2794                         uth->uu_cdir = nd.ni_vp;
2795                         OSBitOrAtomic(P_THCWD, &p->p_flag);
2796                 } else {
2797                         vnode_rele(nd.ni_vp);
2798                         return (ENOENT);
2799                 }
2800         } else {
2801                 proc_fdlock(p);
2802                 tvp = fdp->fd_cdir;
2803                 fdp->fd_cdir = nd.ni_vp;
2804                 proc_fdunlock(p);
2805         }
2806
2807         if (tvp)
2808                 vnode_rele(tvp);
2809
2810         return (0);
2811 }
2812
2813
2814 /*
2815  * chdir
2816  *
2817  * Change current working directory (".") for the entire process
2818  *
2819  * Parameters:  p       Process requesting the call
2820  *              uap     User argument descriptor (see below)
2821  *              retval  (ignored)
2822  *
2823  * Indirect parameters: uap->path       Directory path
2824  *
2825  * Returns:     0                       Success
2826  *              common_chdir: ENOTDIR
2827  *              common_chdir: ENOENT    No such file or directory
2828  *              common_chdir: ???
2829  *
2830  */
2831 int
2832 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
2833 {
2834         return common_chdir(p, (void *)uap, 0);
2835 }
2836
2837 /*
2838  * __pthread_chdir
2839  *
2840  * Change current working directory (".") for a single thread
2841  *
2842  * Parameters:  p       Process requesting the call
2843  *              uap     User argument descriptor (see below)
2844  *              retval  (ignored)
2845  *
2846  * Indirect parameters: uap->path       Directory path
2847  *
2848  * Returns:     0                       Success
2849  *              common_chdir: ENOTDIR
2850  *              common_chdir: ENOENT    No such file or directory
2851  *              common_chdir: ???
2852  *
2853  */
2854 int
2855 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
2856 {
2857         return common_chdir(p, (void *)uap, 1);
2858 }
2859
2860
2861 /*
2862  * Change notion of root (``/'') directory.
2863  */
2864 /* ARGSUSED */
2865 int
2866 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
2867 {
2868         struct filedesc *fdp = p->p_fd;
2869         int error;
2870         struct nameidata nd;
2871         vnode_t tvp;
2872         vfs_context_t ctx = vfs_context_current();
2873
2874         if ((error = suser(kauth_cred_get(), &p->p_acflag)))
2875                 return (error);
2876
2877         NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
2878                 UIO_USERSPACE, uap->path, ctx);
2879         error = change_dir(&nd, ctx);
2880         if (error)
2881                 return (error);
2882
2883 #if CONFIG_MACF
2884         error = mac_vnode_check_chroot(ctx, nd.ni_vp,
2885             &nd.ni_cnd);
2886         if (error) {
2887                 vnode_put(nd.ni_vp);
2888                 return (error);
2889         }
2890 #endif
2891
2892         if ( (error = vnode_ref(nd.ni_vp)) ) {
2893                 vnode_put(nd.ni_vp);
2894                 return (error);
2895         }
2896         vnode_put(nd.ni_vp);
2897
2898         proc_fdlock(p);
2899         tvp = fdp->fd_rdir;
2900         fdp->fd_rdir = nd.ni_vp;
2901         fdp->fd_flags |= FD_CHROOT;
2902         proc_fdunlock(p);
2903
2904         if (tvp != NULL)
2905                 vnode_rele(tvp);
2906
2907         return (0);
2908 }
2909
2910 /*
2911  * Common routine for chroot and chdir.
2912  *
2913  * Returns:     0                       Success
2914  *              ENOTDIR                 Not a directory
2915  *              namei:???               [anything namei can return]
2916  *              vnode_authorize:???     [anything vnode_authorize can return]
2917  */
2918 static int
2919 change_dir(struct nameidata *ndp, vfs_context_t ctx)
2920 {
2921         vnode_t vp;
2922         int error;
2923
2924         if ((error = namei(ndp)))
2925                 return (error);
2926         nameidone(ndp);
2927         vp = ndp->ni_vp;
2928
2929         if (vp->v_type != VDIR) {
2930                 vnode_put(vp);
2931                 return (ENOTDIR);
2932         }
2933
2934 #if CONFIG_MACF
2935         error = mac_vnode_check_chdir(ctx, vp);
2936         if (error) {
2937                 vnode_put(vp);
2938                 return (error);
2939         }
2940 #endif
2941
2942         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2943         if (error) {
2944                 vnode_put(vp);
2945                 return (error);
2946         }
2947
2948         return (error);
2949 }
2950
2951 /*
2952  * Check permissions, allocate an open file structure,
2953  * and call the device open routine if any.
2954  *
2955  * Returns:     0                       Success
2956  *              EINVAL
2957  *              EINTR
2958  *      falloc:ENFILE
2959  *      falloc:EMFILE
2960  *      falloc:ENOMEM
2961  *      vn_open_auth:???
2962  *      dupfdopen:???
2963  *      VNOP_ADVLOCK:???
2964  *      vnode_setsize:???
2965  *
2966  * XXX Need to implement uid, gid
2967  */
2968 int
2969 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *vap, int32_t *retval)
2970 {
2971         proc_t p = vfs_context_proc(ctx);
2972         uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2973         struct filedesc *fdp = p->p_fd;
2974         struct fileproc *fp;
2975         vnode_t vp;
2976         int flags, oflags;
2977         struct fileproc *nfp;
2978         int type, indx, error;
2979         struct flock lf;
2980         int no_controlling_tty = 0;
2981         int deny_controlling_tty = 0;
2982         struct session *sessp = SESSION_NULL;
2983         struct vfs_context context = *vfs_context_current();    /* local copy */
2984
2985         oflags = uflags;
2986
2987         if ((oflags & O_ACCMODE) == O_ACCMODE)
2988                 return(EINVAL);
2989         flags = FFLAGS(uflags);
2990
2991         AUDIT_ARG(fflags, oflags);
2992         AUDIT_ARG(mode, vap->va_mode);
2993
2994         if ( (error = falloc(p, &nfp, &indx, ctx)) ) {
2995                 return (error);
2996         }
2997         fp = nfp;
2998         uu->uu_dupfd = -indx - 1;
2999
3000         if (!(p->p_flag & P_CONTROLT)) {
3001                 sessp = proc_session(p);
3002                 no_controlling_tty = 1;
3003                 /*
3004                  * If conditions would warrant getting a controlling tty if
3005                  * the device being opened is a tty (see ttyopen in tty.c),
3006                  * but the open flags deny it, set a flag in the session to
3007                  * prevent it.
3008                  */
3009                 if (SESS_LEADER(p, sessp) &&
3010                     sessp->s_ttyvp == NULL &&
3011                     (flags & O_NOCTTY)) {
3012                         session_lock(sessp);
3013                         sessp->s_flags |= S_NOCTTY;
3014                         session_unlock(sessp);
3015                         deny_controlling_tty = 1;
3016                 }
3017         }
3018
3019         if ((error = vn_open_auth(ndp, &flags, vap))) {
3020                 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){        /* XXX from fdopen */
3021                         if ((error = dupfdopen(fdp, indx, uu->uu_dupfd, flags, error)) == 0) {
3022                                 fp_drop(p, indx, NULL, 0);
3023                                 *retval = indx;
3024                                 if (deny_controlling_tty) {
3025                                         session_lock(sessp);
3026                                         sessp->s_flags &= ~S_NOCTTY;
3027                                         session_unlock(sessp);
3028                                 }
3029                                 if (sessp != SESSION_NULL)
3030                                         session_rele(sessp);
3031                                 return (0);
3032                         }
3033                 }
3034                 if (error == ERESTART)
3035                         error = EINTR;
3036                 fp_free(p, indx, fp);
3037
3038                 if (deny_controlling_tty) {
3039                         session_lock(sessp);
3040                         sessp->s_flags &= ~S_NOCTTY;
3041                         session_unlock(sessp);
3042                 }
3043                 if (sessp != SESSION_NULL)
3044                         session_rele(sessp);
3045                 return (error);
3046         }
3047         uu->uu_dupfd = 0;
3048         vp = ndp->ni_vp;
3049
3050         fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
3051         fp->f_fglob->fg_type = DTYPE_VNODE;
3052         fp->f_fglob->fg_ops = &vnops;
3053         fp->f_fglob->fg_data = (caddr_t)vp;
3054
3055         if (flags & (O_EXLOCK | O_SHLOCK)) {
3056                 lf.l_whence = SEEK_SET;
3057                 lf.l_start = 0;
3058                 lf.l_len = 0;
3059                 if (flags & O_EXLOCK)
3060                         lf.l_type = F_WRLCK;
3061                 else
3062                         lf.l_type = F_RDLCK;
3063                 type = F_FLOCK;
3064                 if ((flags & FNONBLOCK) == 0)
3065                         type |= F_WAIT;
3066 #if CONFIG_MACF
3067                 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3068                     F_SETLK, &lf);
3069                 if (error)
3070                         goto bad;
3071 #endif
3072                 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx)))
3073                         goto bad;
3074                 fp->f_fglob->fg_flag |= FHASLOCK;
3075         }
3076
3077         /* try to truncate by setting the size attribute */
3078         if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3079                 goto bad;
3080
3081         /*
3082          * If the open flags denied the acquisition of a controlling tty,
3083          * clear the flag in the session structure that prevented the lower
3084          * level code from assigning one.
3085          */
3086         if (deny_controlling_tty) {
3087                 session_lock(sessp);
3088                 sessp->s_flags &= ~S_NOCTTY;
3089                 session_unlock(sessp);
3090         }
3091
3092         /*
3093          * If a controlling tty was set by the tty line discipline, then we
3094          * want to set the vp of the tty into the session structure.  We have
3095          * a race here because we can't get to the vp for the tp in ttyopen,
3096          * because it's not passed as a parameter in the open path.
3097          */
3098         if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
3099                 vnode_t ttyvp;
3100
3101                 /*
3102                  * We already have a ref from vn_open_auth(), so we can demand another reference.
3103                  */
3104                 error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE);
3105                 if (error != 0) {
3106                         panic("vnode_ref_ext() with VNODE_REF_FORCE failed?!");
3107                 }
3108
3109                 session_lock(sessp);
3110                 ttyvp = sessp->s_ttyvp;
3111                 sessp->s_ttyvp = vp;
3112                 sessp->s_ttyvid = vnode_vid(vp);
3113                 session_unlock(sessp);
3114                 if (ttyvp != NULLVP)
3115                         vnode_rele(ttyvp);
3116         }
3117
3118         vnode_put(vp);
3119
3120         proc_fdlock(p);
3121         if (flags & O_CLOEXEC)
3122                 *fdflags(p, indx) |= UF_EXCLOSE;
3123         procfdtbl_releasefd(p, indx, NULL);
3124         fp_drop(p, indx, fp, 1);
3125         proc_fdunlock(p);
3126
3127         *retval = indx;
3128
3129         if (sessp != SESSION_NULL)
3130                 session_rele(sessp);
3131         return (0);
3132 bad:
3133         if (deny_controlling_tty) {
3134                 session_lock(sessp);
3135                 sessp->s_flags &= ~S_NOCTTY;
3136                 session_unlock(sessp);
3137         }
3138         if (sessp != SESSION_NULL)
3139                 session_rele(sessp);
3140
3141         /* Modify local copy (to not damage thread copy) */
3142         context.vc_ucred = fp->f_fglob->fg_cred;
3143
3144         vn_close(vp, fp->f_fglob->fg_flag, &context);
3145         vnode_put(vp);
3146         fp_free(p, indx, fp);
3147
3148         return (error);
3149
3150 }
3151
3152 /*
3153  * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3154  *
3155  * Parameters:  p                       Process requesting the open
3156  *              uap                     User argument descriptor (see below)
3157  *              retval                  Pointer to an area to receive the
3158  *                                      return calue from the system call
3159  *
3160  * Indirect:    uap->path               Path to open (same as 'open')
3161  *              uap->flags              Flags to open (same as 'open'
3162  *              uap->uid                UID to set, if creating
3163  *              uap->gid                GID to set, if creating
3164  *              uap->mode               File mode, if creating (same as 'open')
3165  *              uap->xsecurity          ACL to set, if creating
3166  *
3167  * Returns:     0                       Success
3168  *              !0                      errno value
3169  *
3170  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
3171  *
3172  * XXX:         We should enummerate the possible errno values here, and where
3173  *              in the code they originated.
3174  */
3175 int
3176 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
3177 {
3178         struct filedesc *fdp = p->p_fd;
3179         int ciferror;
3180         kauth_filesec_t xsecdst;
3181         struct vnode_attr va;
3182         struct nameidata nd;
3183         int cmode;
3184
3185         AUDIT_ARG(owner, uap->uid, uap->gid);
3186
3187         xsecdst = NULL;
3188         if ((uap->xsecurity != USER_ADDR_NULL) &&
3189             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3190                 return ciferror;
3191
3192         VATTR_INIT(&va);
3193         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3194         VATTR_SET(&va, va_mode, cmode);
3195         if (uap->uid != KAUTH_UID_NONE)
3196                 VATTR_SET(&va, va_uid, uap->uid);
3197         if (uap->gid != KAUTH_GID_NONE)
3198                 VATTR_SET(&va, va_gid, uap->gid);
3199         if (xsecdst != NULL)
3200                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3201
3202         NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3203                uap->path, vfs_context_current());
3204
3205         ciferror = open1(vfs_context_current(), &nd, uap->flags, &va, retval);
3206         if (xsecdst != NULL)
3207                 kauth_filesec_free(xsecdst);
3208
3209         return ciferror;
3210 }
3211
3212 int
3213 open(proc_t p, struct open_args *uap, int32_t *retval)
3214 {
3215         __pthread_testcancel(1);
3216         return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3217 }
3218
3219 int
3220 open_nocancel(proc_t p, struct open_nocancel_args *uap, int32_t *retval)
3221 {
3222         struct filedesc *fdp = p->p_fd;
3223         struct vnode_attr va;
3224         struct nameidata nd;
3225         int cmode;
3226
3227         VATTR_INIT(&va);
3228         /* Mask off all but regular access permissions */
3229         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3230         VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3231
3232         NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3233                uap->path, vfs_context_current());
3234
3235         return(open1(vfs_context_current(), &nd, uap->flags, &va, retval));
3236 }
3237
3238
3239 /*
3240  * Create a special file.
3241  */
3242 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
3243
3244 int
3245 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
3246 {
3247         struct vnode_attr va;
3248         vfs_context_t ctx = vfs_context_current();
3249         int error;
3250         struct nameidata nd;
3251         vnode_t vp, dvp;
3252
3253         VATTR_INIT(&va);
3254         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3255         VATTR_SET(&va, va_rdev, uap->dev);
3256
3257         /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3258         if ((uap->mode & S_IFMT) == S_IFIFO)
3259                 return(mkfifo1(ctx, uap->path, &va));
3260
3261         AUDIT_ARG(mode, uap->mode);
3262         AUDIT_ARG(value32, uap->dev);
3263
3264         if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
3265                 return (error);
3266         NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
3267                 UIO_USERSPACE, uap->path, ctx);
3268         error = namei(&nd);
3269         if (error)
3270                 return (error);
3271         dvp = nd.ni_dvp;
3272         vp = nd.ni_vp;
3273
3274         if (vp != NULL) {
3275                 error = EEXIST;
3276                 goto out;
3277         }
3278
3279         switch (uap->mode & S_IFMT) {
3280         case S_IFMT:    /* used by badsect to flag bad sectors */
3281                 VATTR_SET(&va, va_type, VBAD);
3282                 break;
3283         case S_IFCHR:
3284                 VATTR_SET(&va, va_type, VCHR);
3285                 break;
3286         case S_IFBLK:
3287                 VATTR_SET(&va, va_type, VBLK);
3288                 break;
3289         default:
3290                 error = EINVAL;
3291                 goto out;
3292         }
3293
3294 #if CONFIG_MACF
3295         error = mac_vnode_check_create(ctx,
3296             nd.ni_dvp, &nd.ni_cnd, &va);
3297         if (error)
3298                 goto out;
3299 #endif
3300
3301         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3302                 goto out;
3303
3304         if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
3305                 goto out;
3306
3307         if (vp) {
3308                 int     update_flags = 0;
3309
3310                 // Make sure the name & parent pointers are hooked up
3311                 if (vp->v_name == NULL)
3312                         update_flags |= VNODE_UPDATE_NAME;
3313                 if (vp->v_parent == NULLVP)
3314                         update_flags |= VNODE_UPDATE_PARENT;
3315
3316                 if (update_flags)
3317                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3318
3319 #if CONFIG_FSE
3320                 add_fsevent(FSE_CREATE_FILE, ctx,
3321                     FSE_ARG_VNODE, vp,
3322                     FSE_ARG_DONE);
3323 #endif
3324         }
3325
3326 out:
3327         /*
3328          * nameidone has to happen before we vnode_put(dvp)
3329          * since it may need to release the fs_nodelock on the dvp
3330          */
3331         nameidone(&nd);
3332
3333         if (vp)
3334                 vnode_put(vp);
3335         vnode_put(dvp);
3336
3337         return (error);
3338 }
3339
3340 /*
3341  * Create a named pipe.
3342  *
3343  * Returns:     0                       Success
3344  *              EEXIST
3345  *      namei:???
3346  *      vnode_authorize:???
3347  *      vn_create:???
3348  */
3349 static int
3350 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
3351 {
3352         vnode_t vp, dvp;
3353         int error;
3354         struct nameidata nd;
3355
3356         NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
3357                 UIO_USERSPACE, upath, ctx);
3358         error = namei(&nd);
3359         if (error)
3360                 return (error);
3361         dvp = nd.ni_dvp;
3362         vp = nd.ni_vp;
3363
3364         /* check that this is a new file and authorize addition */
3365         if (vp != NULL) {
3366                 error = EEXIST;
3367                 goto out;
3368         }
3369         VATTR_SET(vap, va_type, VFIFO);
3370
3371         if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
3372                 goto out;
3373
3374         error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
3375 out:
3376         /*
3377          * nameidone has to happen before we vnode_put(dvp)
3378          * since it may need to release the fs_nodelock on the dvp
3379          */
3380         nameidone(&nd);
3381
3382         if (vp)
3383                 vnode_put(vp);
3384         vnode_put(dvp);
3385
3386         return error;
3387 }
3388
3389
3390 /*
3391  * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
3392  *
3393  * Parameters:  p                       Process requesting the open
3394  *              uap                     User argument descriptor (see below)
3395  *              retval                  (Ignored)
3396  *
3397  * Indirect:    uap->path               Path to fifo (same as 'mkfifo')
3398  *              uap->uid                UID to set
3399  *              uap->gid                GID to set
3400  *              uap->mode               File mode to set (same as 'mkfifo')
3401  *              uap->xsecurity          ACL to set, if creating
3402  *
3403  * Returns:     0                       Success
3404  *              !0                      errno value
3405  *
3406  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
3407  *
3408  * XXX:         We should enummerate the possible errno values here, and where
3409  *              in the code they originated.
3410  */
3411 int
3412 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
3413 {
3414         int ciferror;
3415         kauth_filesec_t xsecdst;
3416         struct vnode_attr va;
3417
3418         AUDIT_ARG(owner, uap->uid, uap->gid);
3419
3420         xsecdst = KAUTH_FILESEC_NONE;
3421         if (uap->xsecurity != USER_ADDR_NULL) {
3422                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
3423                         return ciferror;
3424         }
3425
3426         VATTR_INIT(&va);
3427         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3428         if (uap->uid != KAUTH_UID_NONE)
3429                 VATTR_SET(&va, va_uid, uap->uid);
3430         if (uap->gid != KAUTH_GID_NONE)
3431                 VATTR_SET(&va, va_gid, uap->gid);
3432         if (xsecdst != KAUTH_FILESEC_NONE)
3433                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3434
3435         ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
3436
3437         if (xsecdst != KAUTH_FILESEC_NONE)
3438                 kauth_filesec_free(xsecdst);
3439         return ciferror;
3440 }
3441
3442 /* ARGSUSED */
3443 int
3444 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
3445 {
3446         struct vnode_attr va;
3447
3448         VATTR_INIT(&va);
3449         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3450
3451         return(mkfifo1(vfs_context_current(), uap->path, &va));
3452 }
3453
3454
3455 static char *
3456 my_strrchr(char *p, int ch)
3457 {
3458         char *save;
3459
3460         for (save = NULL;; ++p) {
3461                 if (*p == ch)
3462                         save = p;
3463                 if (!*p)
3464                         return(save);
3465         }
3466         /* NOTREACHED */
3467 }
3468
3469 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
3470
3471 int
3472 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
3473 {
3474         int ret, len = _len;
3475
3476         *truncated_path = 0;
3477         ret = vn_getpath(dvp, path, &len);
3478         if (ret == 0 && len < (MAXPATHLEN - 1)) {
3479                 if (leafname) {
3480                         path[len-1] = '/';
3481                         len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
3482                         if (len > MAXPATHLEN) {
3483                                 char *ptr;
3484
3485                                 // the string got truncated!
3486                                 *truncated_path = 1;
3487                                 ptr = my_strrchr(path, '/');
3488                                 if (ptr) {
3489                                         *ptr = '\0';   // chop off the string at the last directory component
3490                                 }
3491                                 len = strlen(path) + 1;
3492                         }
3493                 }
3494         } else if (ret == 0) {
3495                 *truncated_path = 1;
3496         } else if (ret != 0) {
3497                 struct vnode *mydvp=dvp;
3498
3499                 if (ret != ENOSPC) {
3500                         printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
3501                                dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
3502                 }
3503                 *truncated_path = 1;
3504
3505                 do {
3506                         if (mydvp->v_parent != NULL) {
3507                                 mydvp = mydvp->v_parent;
3508                         } else if (mydvp->v_mount) {
3509                                 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
3510                                 break;
3511                         } else {
3512                                 // no parent and no mount point?  only thing is to punt and say "/" changed
3513                                 strlcpy(path, "/", _len);
3514                                 len = 2;
3515                                 mydvp = NULL;
3516                         }
3517
3518                         if (mydvp == NULL) {
3519                                 break;
3520                         }
3521
3522                         len = _len;
3523                         ret = vn_getpath(mydvp, path, &len);
3524                 } while (ret == ENOSPC);
3525         }
3526
3527         return len;
3528 }
3529
3530
3531 /*
3532  * Make a hard file link.
3533  *
3534  * Returns:     0                       Success
3535  *              EPERM
3536  *              EEXIST
3537  *              EXDEV
3538  *      namei:???
3539  *      vnode_authorize:???
3540  *      VNOP_LINK:???
3541  */
3542 /* ARGSUSED */
3543 int
3544 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
3545 {
3546         vnode_t vp, dvp, lvp;
3547         struct nameidata nd;
3548         vfs_context_t ctx = vfs_context_current();
3549         int error;
3550 #if CONFIG_FSE
3551         fse_info finfo;
3552 #endif
3553         int need_event, has_listeners;
3554         char *target_path = NULL;
3555         int truncated=0;
3556
3557         vp = dvp = lvp = NULLVP;
3558
3559         /* look up the object we are linking to */
3560         NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1,
3561                 UIO_USERSPACE, uap->path, ctx);
3562         error = namei(&nd);
3563         if (error)
3564                 return (error);
3565         vp = nd.ni_vp;
3566
3567         nameidone(&nd);
3568
3569         /*
3570          * Normally, linking to directories is not supported.
3571          * However, some file systems may have limited support.
3572          */
3573         if (vp->v_type == VDIR) {
3574                 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
3575                         error = EPERM;   /* POSIX */
3576                         goto out;
3577                 }
3578                 /* Linking to a directory requires ownership. */
3579                 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
3580                         struct vnode_attr dva;
3581
3582                         VATTR_INIT(&dva);
3583                         VATTR_WANTED(&dva, va_uid);
3584                         if (vnode_getattr(vp, &dva, ctx) != 0 ||
3585                             !VATTR_IS_SUPPORTED(&dva, va_uid) ||
3586                             (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
3587                                 error = EACCES;
3588                                 goto out;
3589                         }
3590                 }
3591         }
3592
3593         /* lookup the target node */
3594 #if CONFIG_TRIGGERS
3595         nd.ni_op = OP_LINK;
3596 #endif
3597         nd.ni_cnd.cn_nameiop = CREATE;
3598         nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
3599         nd.ni_dirp = uap->link;
3600         error = namei(&nd);
3601         if (error != 0)
3602                 goto out;
3603         dvp = nd.ni_dvp;
3604         lvp = nd.ni_vp;
3605
3606 #if CONFIG_MACF
3607         if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
3608                 goto out2;
3609 #endif
3610
3611         /* or to anything that kauth doesn't want us to (eg. immutable items) */
3612         if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
3613                 goto out2;
3614
3615         /* target node must not exist */
3616         if (lvp != NULLVP) {
3617                 error = EEXIST;
3618                 goto out2;
3619         }
3620         /* cannot link across mountpoints */
3621         if (vnode_mount(vp) != vnode_mount(dvp)) {
3622                 error = EXDEV;
3623                 goto out2;
3624         }
3625
3626         /* authorize creation of the target note */
3627         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3628                 goto out2;
3629
3630         /* and finally make the link */
3631         error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
3632         if (error)
3633                 goto out2;
3634
3635 #if CONFIG_FSE
3636         need_event = need_fsevent(FSE_CREATE_FILE, dvp);
3637 #else
3638         need_event = 0;
3639 #endif
3640         has_listeners = kauth_authorize_fileop_has_listeners();
3641
3642         if (need_event || has_listeners) {
3643                 char *link_to_path = NULL;
3644                 int len, link_name_len;
3645
3646                 /* build the path to the new link file */
3647                 GET_PATH(target_path);
3648                 if (target_path == NULL) {
3649                         error = ENOMEM;
3650                         goto out2;
3651                 }
3652
3653                 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
3654
3655                 if (has_listeners) {
3656                         /* build the path to file we are linking to */
3657                         GET_PATH(link_to_path);
3658                         if (link_to_path == NULL) {
3659                                 error = ENOMEM;
3660                                 goto out2;
3661                         }
3662
3663                         link_name_len = MAXPATHLEN;
3664                         vn_getpath(vp, link_to_path, &link_name_len);
3665
3666                         /*
3667                          * Call out to allow 3rd party notification of rename.
3668                          * Ignore result of kauth_authorize_fileop call.
3669                          */
3670                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
3671                                                (uintptr_t)link_to_path, (uintptr_t)target_path);
3672                         if (link_to_path != NULL) {
3673                                 RELEASE_PATH(link_to_path);
3674                         }
3675                 }
3676 #if CONFIG_FSE
3677                 if (need_event) {
3678                         /* construct fsevent */
3679                         if (get_fse_info(vp, &finfo, ctx) == 0) {
3680                                 if (truncated) {
3681                                         finfo.mode |= FSE_TRUNCATED_PATH;
3682                                 }
3683
3684                                 // build the path to the destination of the link
3685                                 add_fsevent(FSE_CREATE_FILE, ctx,
3686                                             FSE_ARG_STRING, len, target_path,
3687                                             FSE_ARG_FINFO, &finfo,
3688                                             FSE_ARG_DONE);
3689                         }
3690                         if (vp->v_parent) {
3691                             add_fsevent(FSE_STAT_CHANGED, ctx,
3692                                 FSE_ARG_VNODE, vp->v_parent,
3693                                 FSE_ARG_DONE);
3694                         }
3695                 }
3696 #endif
3697         }
3698 out2:
3699         /*
3700          * nameidone has to happen before we vnode_put(dvp)
3701          * since it may need to release the fs_nodelock on the dvp
3702          */
3703         nameidone(&nd);
3704         if (target_path != NULL) {
3705                 RELEASE_PATH(target_path);
3706         }
3707 out:
3708         if (lvp)
3709                 vnode_put(lvp);
3710         if (dvp)
3711                 vnode_put(dvp);
3712         vnode_put(vp);
3713         return (error);
3714 }
3715
3716 /*
3717  * Make a symbolic link.
3718  *
3719  * We could add support for ACLs here too...
3720  */
3721 /* ARGSUSED */
3722 int
3723 symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval)
3724 {
3725         struct vnode_attr va;
3726         char *path;
3727         int error;
3728         struct nameidata nd;
3729         vfs_context_t ctx = vfs_context_current();
3730         vnode_t vp, dvp;
3731         size_t dummy=0;
3732
3733         MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
3734         error = copyinstr(uap->path, path, MAXPATHLEN, &dummy);
3735         if (error)
3736                 goto out;
3737         AUDIT_ARG(text, path);  /* This is the link string */
3738
3739         NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
3740                 UIO_USERSPACE, uap->link, ctx);
3741         error = namei(&nd);
3742         if (error)
3743                 goto out;
3744         dvp = nd.ni_dvp;
3745         vp = nd.ni_vp;
3746
3747         VATTR_INIT(&va);
3748         VATTR_SET(&va, va_type, VLNK);
3749         VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
3750 #if CONFIG_MACF
3751         error = mac_vnode_check_create(ctx,
3752                         dvp, &nd.ni_cnd, &va);
3753 #endif
3754         if (error != 0) {
3755             goto skipit;
3756         }
3757
3758         if (vp != NULL) {
3759             error = EEXIST;
3760             goto skipit;
3761         }
3762
3763         /* authorize */
3764         if (error == 0)
3765                 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
3766         /* get default ownership, etc. */
3767         if (error == 0)
3768                 error = vnode_authattr_new(dvp, &va, 0, ctx);
3769         if (error == 0)
3770                 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
3771
3772         /* do fallback attribute handling */
3773         if (error == 0)
3774                 error = vnode_setattr_fallback(vp, &va, ctx);
3775
3776         if (error == 0) {
3777                 int     update_flags = 0;
3778
3779                 if (vp == NULL) {
3780                         nd.ni_cnd.cn_nameiop = LOOKUP;
3781 #if CONFIG_TRIGGERS
3782                         nd.ni_op = OP_LOOKUP;
3783 #endif
3784                         nd.ni_cnd.cn_flags = 0;
3785                         error = namei(&nd);
3786                         vp = nd.ni_vp;
3787
3788                         if (vp == NULL)
3789                                 goto skipit;
3790                 }
3791
3792 #if 0  /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
3793                 /* call out to allow 3rd party notification of rename.
3794                  * Ignore result of kauth_authorize_fileop call.
3795                  */
3796                 if (kauth_authorize_fileop_has_listeners() &&
3797                     namei(&nd) == 0) {
3798                         char *new_link_path = NULL;
3799                         int             len;
3800
3801                         /* build the path to the new link file */
3802                         new_link_path = get_pathbuff();
3803                         len = MAXPATHLEN;
3804                         vn_getpath(dvp, new_link_path, &len);
3805                         if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
3806                                 new_link_path[len - 1] = '/';
3807                                 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
3808                         }
3809
3810                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
3811                                            (uintptr_t)path, (uintptr_t)new_link_path);
3812                         if (new_link_path != NULL)
3813                                 release_pathbuff(new_link_path);
3814                 }
3815 #endif
3816                 // Make sure the name & parent pointers are hooked up
3817                 if (vp->v_name == NULL)
3818                         update_flags |= VNODE_UPDATE_NAME;
3819                 if (vp->v_parent == NULLVP)
3820                         update_flags |= VNODE_UPDATE_PARENT;
3821
3822                 if (update_flags)
3823                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3824
3825 #if CONFIG_FSE
3826                 add_fsevent(FSE_CREATE_FILE, ctx,
3827                             FSE_ARG_VNODE, vp,
3828                             FSE_ARG_DONE);
3829 #endif
3830         }
3831
3832 skipit:
3833         /*
3834          * nameidone has to happen before we vnode_put(dvp)
3835          * since it may need to release the fs_nodelock on the dvp
3836          */
3837         nameidone(&nd);
3838
3839         if (vp)
3840                 vnode_put(vp);
3841         vnode_put(dvp);
3842 out:
3843         FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
3844
3845         return (error);
3846 }
3847
3848 /*
3849  * Delete a whiteout from the filesystem.
3850  * XXX authorization not implmented for whiteouts
3851  */
3852 int
3853 undelete(__unused proc_t p, struct undelete_args *uap, __unused int32_t *retval)
3854 {
3855         int error;
3856         struct nameidata nd;
3857         vfs_context_t ctx = vfs_context_current();
3858         vnode_t vp, dvp;
3859
3860         NDINIT(&nd, DELETE, OP_UNLINK, LOCKPARENT | DOWHITEOUT | AUDITVNPATH1,
3861                 UIO_USERSPACE, uap->path, ctx);
3862         error = namei(&nd);
3863         if (error)
3864                 return (error);
3865         dvp = nd.ni_dvp;
3866         vp = nd.ni_vp;
3867
3868         if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) {
3869                 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, ctx);
3870         } else
3871                 error = EEXIST;
3872
3873         /*
3874          * nameidone has to happen before we vnode_put(dvp)
3875          * since it may need to release the fs_nodelock on the dvp
3876          */
3877         nameidone(&nd);
3878
3879         if (vp)
3880                 vnode_put(vp);
3881         vnode_put(dvp);
3882
3883         return (error);
3884 }
3885
3886
3887 /*
3888  * Delete a name from the filesystem.
3889  */
3890 /* ARGSUSED */
3891 int
3892 unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy)
3893 {
3894         vnode_t vp, dvp;
3895         int error;
3896         struct componentname *cnp;
3897         char  *path = NULL;
3898         int  len=0;
3899 #if CONFIG_FSE
3900         fse_info  finfo;
3901         struct vnode_attr va;
3902 #endif
3903         int flags = 0;
3904         int need_event = 0;
3905         int has_listeners = 0;
3906         int truncated_path=0;
3907         int batched;
3908         struct vnode_attr *vap = NULL;
3909
3910 #if NAMEDRSRCFORK
3911         /* unlink or delete is allowed on rsrc forks and named streams */
3912         ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
3913 #endif
3914
3915         ndp->ni_cnd.cn_flags |= LOCKPARENT;
3916         ndp->ni_flag |= NAMEI_COMPOUNDREMOVE;
3917         cnp = &ndp->ni_cnd;
3918
3919 lookup_continue:
3920         error = namei(ndp);
3921         if (error)
3922                 return (error);
3923
3924         dvp = ndp->ni_dvp;
3925         vp = ndp->ni_vp;
3926
3927
3928         /* With Carbon delete semantics, busy files cannot be deleted */
3929         if (nodelbusy) {
3930                 flags |= VNODE_REMOVE_NODELETEBUSY;
3931         }
3932
3933         if (vp) {
3934                 batched = vnode_compound_remove_available(vp);
3935                 /*
3936                  * The root of a mounted filesystem cannot be deleted.
3937                  */
3938                 if (vp->v_flag & VROOT) {
3939                         error = EBUSY;
3940                 }
3941
3942                 if (!batched) {
3943                         error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
3944                         if (error) {
3945                                 goto out;
3946                         }
3947                 }
3948         } else {
3949                 batched = 1;
3950
3951                 if (!vnode_compound_remove_available(dvp)) {
3952                         panic("No vp, but no compound remove?");
3953                 }
3954         }
3955
3956 #if CONFIG_FSE
3957         need_event = need_fsevent(FSE_DELETE, dvp);
3958         if (need_event) {
3959                 if (!batched) {
3960                         if ((vp->v_flag & VISHARDLINK) == 0) {
3961                                 /* XXX need to get these data in batched VNOP */
3962                                 get_fse_info(vp, &finfo, ctx);
3963                         }
3964                 } else {
3965                         error = vfs_get_notify_attributes(&va);
3966                         if (error) {
3967                                 goto out;
3968                         }
3969
3970                         vap = &va;
3971                 }
3972         }
3973 #endif
3974         has_listeners = kauth_authorize_fileop_has_listeners();
3975         if (need_event || has_listeners) {
3976                 if (path == NULL) {
3977                         GET_PATH(path);
3978                         if (path == NULL) {
3979                                 error = ENOMEM;
3980                                 goto out;
3981                         }
3982                 }
3983                 len = safe_getpath(dvp, ndp->ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
3984         }
3985
3986 #if NAMEDRSRCFORK
3987         if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK)
3988                 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
3989         else
3990 #endif
3991         {
3992                 error = vn_remove(dvp, &ndp->ni_vp, ndp, flags, vap, ctx);
3993                 vp = ndp->ni_vp;
3994                 if (error == EKEEPLOOKING) {
3995                         if (!batched) {
3996                                 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
3997                         }
3998
3999                         if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
4000                                 panic("EKEEPLOOKING, but continue flag not set?");
4001                         }
4002
4003                         if (vnode_isdir(vp)) {
4004                                 error = EISDIR;
4005                                 goto out;
4006                         }
4007                         goto lookup_continue;
4008                 }
4009         }
4010
4011         /*
4012          * Call out to allow 3rd party notification of delete.
4013          * Ignore result of kauth_authorize_fileop call.
4014          */
4015         if (!error) {
4016                 if (has_listeners) {
4017                         kauth_authorize_fileop(vfs_context_ucred(ctx),
4018                                 KAUTH_FILEOP_DELETE,
4019                                 (uintptr_t)vp,
4020                                 (uintptr_t)path);
4021                 }
4022
4023                 if (vp->v_flag & VISHARDLINK) {
4024                     //
4025                     // if a hardlink gets deleted we want to blow away the
4026                     // v_parent link because the path that got us to this
4027                     // instance of the link is no longer valid.  this will
4028                     // force the next call to get the path to ask the file
4029                     // system instead of just following the v_parent link.
4030                     //
4031                     vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
4032                 }
4033
4034 #if CONFIG_FSE
4035                 if (need_event) {
4036                         if (vp->v_flag & VISHARDLINK) {
4037                                 get_fse_info(vp, &finfo, ctx);
4038                         } else if (vap) {
4039                                 vnode_get_fse_info_from_vap(vp, &finfo, vap);
4040                         }
4041                         if (truncated_path) {
4042                                 finfo.mode |= FSE_TRUNCATED_PATH;
4043                         }
4044                         add_fsevent(FSE_DELETE, ctx,
4045                                                 FSE_ARG_STRING, len, path,
4046                                                 FSE_ARG_FINFO, &finfo,
4047                                                 FSE_ARG_DONE);
4048                 }
4049 #endif
4050         }
4051
4052 out:
4053         if (path != NULL)
4054                 RELEASE_PATH(path);
4055
4056 #if NAMEDRSRCFORK
4057         /* recycle the deleted rsrc fork vnode to force a reclaim, which
4058          * will cause its shadow file to go away if necessary.
4059          */
4060          if (vp && (vnode_isnamedstream(vp)) &&
4061                 (vp->v_parent != NULLVP) &&
4062                 vnode_isshadow(vp)) {
4063                         vnode_recycle(vp);
4064          }
4065 #endif
4066         /*
4067          * nameidone has to happen before we vnode_put(dvp)
4068          * since it may need to release the fs_nodelock on the dvp
4069          */
4070         nameidone(ndp);
4071         vnode_put(dvp);
4072         if (vp) {
4073                 vnode_put(vp);
4074         }
4075         return (error);
4076 }
4077
4078 /*
4079  * Delete a name from the filesystem using POSIX semantics.
4080  */
4081 int
4082 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
4083 {
4084         struct nameidata nd;
4085         vfs_context_t ctx = vfs_context_current();
4086
4087         NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE,
4088                uap->path, ctx);
4089         return unlink1(ctx, &nd, 0);
4090 }
4091
4092 /*
4093  * Delete a name from the filesystem using Carbon semantics.
4094  */
4095 int
4096 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
4097 {
4098         struct nameidata nd;
4099         vfs_context_t ctx = vfs_context_current();
4100
4101         NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE,
4102                uap->path, ctx);
4103         return unlink1(ctx, &nd, 1);
4104 }
4105
4106 /*
4107  * Reposition read/write file offset.
4108  */
4109 int
4110 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
4111 {
4112         struct fileproc *fp;
4113         vnode_t vp;
4114         struct vfs_context *ctx;
4115         off_t offset = uap->offset, file_size;
4116         int error;
4117
4118         if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4119                 if (error == ENOTSUP)
4120                         return (ESPIPE);
4121                 return (error);
4122         }
4123         if (vnode_isfifo(vp)) {
4124                 file_drop(uap->fd);
4125                 return(ESPIPE);
4126         }
4127
4128
4129         ctx = vfs_context_current();
4130 #if CONFIG_MACF
4131         if (uap->whence == L_INCR && uap->offset == 0)
4132                 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4133                     fp->f_fglob);
4134         else
4135                 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4136                     fp->f_fglob);
4137         if (error) {
4138                 file_drop(uap->fd);
4139                 return (error);
4140         }
4141 #endif
4142         if ( (error = vnode_getwithref(vp)) ) {
4143                 file_drop(uap->fd);
4144                 return(error);
4145         }
4146
4147         switch (uap->whence) {
4148         case L_INCR:
4149                 offset += fp->f_fglob->fg_offset;
4150                 break;
4151         case L_XTND:
4152                 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
4153                         break;
4154                 offset += file_size;
4155                 break;
4156         case L_SET:
4157                 break;
4158         default:
4159                 error = EINVAL;
4160         }
4161         if (error == 0) {
4162                 if (uap->offset > 0 && offset < 0) {
4163                         /* Incremented/relative move past max size */
4164                         error = EOVERFLOW;
4165                 } else {
4166                         /*
4167                          * Allow negative offsets on character devices, per
4168                          * POSIX 1003.1-2001.  Most likely for writing disk
4169                          * labels.
4170                          */
4171                         if (offset < 0 && vp->v_type != VCHR) {
4172                                 /* Decremented/relative move before start */
4173                                 error = EINVAL;
4174                         } else {
4175                                 /* Success */
4176                                 fp->f_fglob->fg_offset = offset;
4177                                 *retval = fp->f_fglob->fg_offset;
4178                         }
4179                 }
4180         }
4181
4182         /*
4183          * An lseek can affect whether data is "available to read."  Use
4184          * hint of NOTE_NONE so no EVFILT_VNODE events fire
4185          */
4186         post_event_if_success(vp, error, NOTE_NONE);
4187         (void)vnode_put(vp);
4188         file_drop(uap->fd);
4189         return (error);
4190 }
4191
4192
4193 /*
4194  * Check access permissions.
4195  *
4196  * Returns:     0                       Success
4197  *              vnode_authorize:???
4198  */
4199 static int
4200 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
4201 {
4202         kauth_action_t action;
4203         int error;
4204
4205         /*
4206          * If just the regular access bits, convert them to something
4207          * that vnode_authorize will understand.
4208          */
4209         if (!(uflags & _ACCESS_EXTENDED_MASK)) {
4210                 action = 0;
4211                 if (uflags & R_OK)
4212                         action |= KAUTH_VNODE_READ_DATA;        /* aka KAUTH_VNODE_LIST_DIRECTORY */
4213                 if (uflags & W_OK) {
4214                         if (vnode_isdir(vp)) {
4215                                 action |= KAUTH_VNODE_ADD_FILE |
4216                                     KAUTH_VNODE_ADD_SUBDIRECTORY;
4217                                 /* might want delete rights here too */
4218                         } else {
4219                                 action |= KAUTH_VNODE_WRITE_DATA;
4220                         }
4221                 }
4222                 if (uflags & X_OK) {
4223                         if (vnode_isdir(vp)) {
4224                                 action |= KAUTH_VNODE_SEARCH;
4225                         } else {
4226                                 action |= KAUTH_VNODE_EXECUTE;
4227                         }
4228                 }
4229         } else {
4230                 /* take advantage of definition of uflags */
4231                 action = uflags >> 8;
4232         }
4233
4234 #if CONFIG_MACF
4235         error = mac_vnode_check_access(ctx, vp, uflags);
4236         if (error)
4237                 return (error);
4238 #endif /* MAC */
4239
4240         /* action == 0 means only check for existence */
4241         if (action != 0) {
4242                 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
4243         } else {
4244                 error = 0;
4245         }
4246
4247         return(error);
4248 }
4249
4250
4251
4252 /*
4253  * access_extended: Check access permissions in bulk.
4254  *
4255  * Description: uap->entries            Pointer to an array of accessx
4256  *                                      descriptor structs, plus one or
4257  *                                      more NULL terminated strings (see
4258  *                                      "Notes" section below).
4259  *              uap->size               Size of the area pointed to by
4260  *                                      uap->entries.
4261  *              uap->results            Pointer to the results array.
4262  *
4263  * Returns:     0                       Success
4264  *              ENOMEM                  Insufficient memory
4265  *              EINVAL                  Invalid arguments
4266  *              namei:EFAULT            Bad address
4267  *              namei:ENAMETOOLONG      Filename too long
4268  *              namei:ENOENT            No such file or directory
4269  *              namei:ELOOP             Too many levels of symbolic links
4270  *              namei:EBADF             Bad file descriptor
4271  *              namei:ENOTDIR           Not a directory
4272  *              namei:???
4273  *              access1:
4274  *
4275  * Implicit returns:
4276  *              uap->results            Array contents modified
4277  *
4278  * Notes:       The uap->entries are structured as an arbitrary length array
4279  *              of accessx descriptors, followed by one or more NULL terminated
4280  *              strings
4281  *
4282  *                      struct accessx_descriptor[0]
4283  *                      ...
4284  *                      struct accessx_descriptor[n]
4285  *                      char name_data[0];
4286  *
4287  *              We determine the entry count by walking the buffer containing
4288  *              the uap->entries argument descriptor.  For each descriptor we
4289  *              see, the valid values for the offset ad_name_offset will be
4290  *              in the byte range:
4291  *
4292  *                      [ uap->entries + sizeof(struct accessx_descriptor) ]
4293  *                                              to
4294  *                              [ uap->entries + uap->size - 2 ]
4295  *
4296  *              since we must have at least one string, and the string must
4297  *              be at least one character plus the NULL terminator in length.
4298  *
4299  * XXX:         Need to support the check-as uid argument
4300  */
4301 int
4302 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
4303 {
4304         struct accessx_descriptor *input = NULL;
4305         errno_t *result = NULL;
4306         errno_t error = 0;
4307         int wantdelete = 0;
4308         unsigned int desc_max, desc_actual, i, j;
4309         struct vfs_context context;
4310         struct nameidata nd;
4311         int niopts;
4312         vnode_t vp = NULL;
4313         vnode_t dvp = NULL;
4314 #define ACCESSX_MAX_DESCR_ON_STACK 10
4315         struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
4316
4317         context.vc_ucred = NULL;
4318
4319         /*
4320          * Validate parameters; if valid, copy the descriptor array and string
4321          * arguments into local memory.  Before proceeding, the following
4322          * conditions must have been met:
4323          *
4324          * o    The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
4325          * o    There must be sufficient room in the request for at least one
4326          *      descriptor and a one yte NUL terminated string.
4327          * o    The allocation of local storage must not fail.
4328          */
4329         if (uap->size > ACCESSX_MAX_TABLESIZE)
4330                 return(ENOMEM);
4331         if (uap->size < (sizeof(struct accessx_descriptor) + 2))
4332                 return(EINVAL);
4333         if (uap->size <= sizeof (stack_input)) {
4334                 input = stack_input;
4335         } else {
4336         MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
4337         if (input == NULL) {
4338                 error = ENOMEM;
4339                 goto out;
4340         }
4341         }
4342         error = copyin(uap->entries, input, uap->size);
4343         if (error)
4344                 goto out;
4345
4346         AUDIT_ARG(opaque, input, uap->size);
4347
4348         /*
4349          * Force NUL termination of the copyin buffer to avoid nami() running
4350          * off the end.  If the caller passes us bogus data, they may get a
4351          * bogus result.
4352          */
4353         ((char *)input)[uap->size - 1] = 0;
4354
4355         /*
4356          * Access is defined as checking against the process' real identity,
4357          * even if operations are checking the effective identity.  This
4358          * requires that we use a local vfs context.
4359          */
4360         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
4361         context.vc_thread = current_thread();
4362
4363         /*
4364          * Find out how many entries we have, so we can allocate the result
4365          * array by walking the list and adjusting the count downward by the
4366          * earliest string offset we see.
4367          */
4368         desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
4369         desc_actual = desc_max;
4370         for (i = 0; i < desc_actual; i++) {
4371                 /*
4372                  * Take the offset to the name string for this entry and
4373                  * convert to an input array index, which would be one off
4374                  * the end of the array if this entry was the lowest-addressed
4375                  * name string.
4376                  */
4377                 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
4378
4379                 /*
4380                  * An offset greater than the max allowable offset is an error.
4381                  * It is also an error for any valid entry to point
4382                  * to a location prior to the end of the current entry, if
4383                  * it's not a reference to the string of the previous entry.
4384                  */
4385                 if (j > desc_max || (j != 0 && j <= i)) {
4386                         error = EINVAL;
4387                         goto out;
4388                 }
4389
4390                 /*
4391                  * An offset of 0 means use the previous descriptor's offset;
4392                  * this is used to chain multiple requests for the same file
4393                  * to avoid multiple lookups.
4394                  */
4395                 if (j == 0) {
4396                         /* This is not valid for the first entry */
4397                         if (i == 0) {
4398                                 error = EINVAL;
4399                                 goto out;
4400                         }
4401                         continue;
4402                 }
4403
4404                 /*
4405                  * If the offset of the string for this descriptor is before
4406                  * what we believe is the current actual last descriptor,
4407                  * then we need to adjust our estimate downward; this permits
4408                  * the string table following the last descriptor to be out
4409                  * of order relative to the descriptor list.
4410                  */
4411                 if (j < desc_actual)
4412                         desc_actual = j;
4413         }
4414
4415         /*
4416          * We limit the actual number of descriptors we are willing to process
4417          * to a hard maximum of ACCESSX_MAX_DESCRIPTORS.  If the number being
4418          * requested does not exceed this limit,
4419          */
4420         if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
4421                 error = ENOMEM;
4422                 goto out;
4423         }
4424         MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
4425         if (result == NULL) {
4426                 error = ENOMEM;
4427                 goto out;
4428         }
4429
4430         /*
4431          * Do the work by iterating over the descriptor entries we know to
4432          * at least appear to contain valid data.
4433          */
4434         error = 0;
4435         for (i = 0; i < desc_actual; i++) {
4436                 /*
4437                  * If the ad_name_offset is 0, then we use the previous
4438                  * results to make the check; otherwise, we are looking up
4439                  * a new file name.
4440                  */
4441                 if (input[i].ad_name_offset != 0) {
4442                         /* discard old vnodes */
4443                         if (vp) {
4444                                 vnode_put(vp);
4445                                 vp = NULL;
4446                         }
4447                         if (dvp) {
4448                                 vnode_put(dvp);
4449                                 dvp = NULL;
4450                         }
4451
4452                         /*
4453                          * Scan forward in the descriptor list to see if we
4454                          * need the parent vnode.  We will need it if we are
4455                          * deleting, since we must have rights  to remove
4456                          * entries in the parent directory, as well as the
4457                          * rights to delete the object itself.
4458                          */
4459                         wantdelete = input[i].ad_flags & _DELETE_OK;
4460                         for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
4461                                 if (input[j].ad_flags & _DELETE_OK)
4462                                         wantdelete = 1;
4463
4464                         niopts = FOLLOW | AUDITVNPATH1;
4465
4466                         /* need parent for vnode_authorize for deletion test */
4467                         if (wantdelete)
4468                                 niopts |= WANTPARENT;
4469
4470                         /* do the lookup */
4471                         NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
4472                                CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
4473                                &context);
4474                         error = namei(&nd);
4475                         if (!error) {
4476                                 vp = nd.ni_vp;
4477                                 if (wantdelete)
4478                                         dvp = nd.ni_dvp;
4479                         }
4480                         nameidone(&nd);
4481                 }
4482
4483                 /*
4484                  * Handle lookup errors.
4485                  */
4486                 switch(error) {
4487                 case ENOENT:
4488                 case EACCES:
4489                 case EPERM:
4490                 case ENOTDIR:
4491                         result[i] = error;
4492                         break;
4493                 case 0:
4494                         /* run this access check */
4495                         result[i] = access1(vp, dvp, input[i].ad_flags, &context);
4496                         break;
4497                 default:
4498                         /* fatal lookup error */
4499
4500                         goto out;
4501                 }
4502         }
4503
4504         AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
4505
4506         /* copy out results */
4507         error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
4508
4509 out:
4510         if (input && input != stack_input)
4511                 FREE(input, M_TEMP);
4512         if (result)
4513                 FREE(result, M_TEMP);
4514         if (vp)
4515                 vnode_put(vp);
4516         if (dvp)
4517                 vnode_put(dvp);
4518         if (IS_VALID_CRED(context.vc_ucred))
4519                 kauth_cred_unref(&context.vc_ucred);
4520         return(error);
4521 }
4522
4523
4524 /*
4525  * Returns:     0                       Success
4526  *              namei:EFAULT            Bad address
4527  *              namei:ENAMETOOLONG      Filename too long
4528  *              namei:ENOENT            No such file or directory
4529  *              namei:ELOOP             Too many levels of symbolic links
4530  *              namei:EBADF             Bad file descriptor
4531  *              namei:ENOTDIR           Not a directory
4532  *              namei:???
4533  *              access1:
4534  */
4535 int
4536 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
4537 {
4538         int error;
4539         struct nameidata nd;
4540         int niopts;
4541         struct vfs_context context;
4542 #if NAMEDRSRCFORK
4543         int is_namedstream = 0;
4544 #endif
4545
4546         /*
4547          * Access is defined as checking against the process'
4548          * real identity, even if operations are checking the
4549          * effective identity.  So we need to tweak the credential
4550          * in the context.
4551          */
4552         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
4553         context.vc_thread = current_thread();
4554
4555         niopts = FOLLOW | AUDITVNPATH1;
4556         /* need parent for vnode_authorize for deletion test */
4557         if (uap->flags & _DELETE_OK)
4558                 niopts |= WANTPARENT;
4559         NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_USERSPACE,
4560                uap->path, &context);
4561
4562 #if NAMEDRSRCFORK
4563         /* access(F_OK) calls are allowed for resource forks. */
4564         if (uap->flags == F_OK)
4565                 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4566 #endif
4567         error = namei(&nd);
4568         if (error)
4569                 goto out;
4570
4571 #if NAMEDRSRCFORK
4572         /* Grab reference on the shadow stream file vnode to
4573          * force an inactive on release which will mark it
4574          * for recycle.
4575          */
4576         if (vnode_isnamedstream(nd.ni_vp) &&
4577             (nd.ni_vp->v_parent != NULLVP) &&
4578             vnode_isshadow(nd.ni_vp)) {
4579                 is_namedstream = 1;
4580                 vnode_ref(nd.ni_vp);
4581         }
4582 #endif
4583
4584         error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context);
4585
4586 #if NAMEDRSRCFORK
4587         if (is_namedstream) {
4588                 vnode_rele(nd.ni_vp);
4589         }
4590 #endif
4591
4592         vnode_put(nd.ni_vp);
4593         if (uap->flags & _DELETE_OK)
4594                 vnode_put(nd.ni_dvp);
4595         nameidone(&nd);
4596
4597 out:
4598         kauth_cred_unref(&context.vc_ucred);
4599         return(error);
4600 }
4601
4602
4603 /*
4604  * Returns:     0                       Success
4605  *              EFAULT
4606  *      copyout:EFAULT
4607  *      namei:???
4608  *      vn_stat:???
4609  */
4610 static int
4611 stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4612 {
4613         union {
4614                 struct stat sb;
4615                 struct stat64 sb64;
4616         } source;
4617         union {
4618                 struct user64_stat user64_sb;
4619                 struct user32_stat user32_sb;
4620                 struct user64_stat64 user64_sb64;
4621                 struct user32_stat64 user32_sb64;
4622         } dest;
4623         caddr_t sbp;
4624         int error, my_size;
4625         kauth_filesec_t fsec;
4626         size_t xsecurity_bufsize;
4627         void * statptr;
4628
4629 #if NAMEDRSRCFORK
4630         int is_namedstream = 0;
4631         /* stat calls are allowed for resource forks. */
4632         ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4633 #endif
4634         error = namei(ndp);
4635         if (error)
4636                 return (error);
4637         fsec = KAUTH_FILESEC_NONE;
4638
4639         statptr = (void *)&source;
4640
4641 #if NAMEDRSRCFORK
4642         /* Grab reference on the shadow stream file vnode to
4643          * force an inactive on release which will mark it
4644          * for recycle.
4645          */
4646         if (vnode_isnamedstream(ndp->ni_vp) &&
4647             (ndp->ni_vp->v_parent != NULLVP) &&
4648             vnode_isshadow(ndp->ni_vp)) {
4649                 is_namedstream = 1;
4650                 vnode_ref(ndp->ni_vp);
4651         }
4652 #endif
4653
4654         error = vn_stat(ndp->ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
4655
4656 #if NAMEDRSRCFORK
4657         if (is_namedstream) {
4658                 vnode_rele(ndp->ni_vp);
4659         }
4660 #endif
4661         vnode_put(ndp->ni_vp);
4662         nameidone(ndp);
4663
4664         if (error)
4665                 return (error);
4666         /* Zap spare fields */
4667         if (isstat64 != 0) {
4668                 source.sb64.st_lspare = 0;
4669                 source.sb64.st_qspare[0] = 0LL;
4670                 source.sb64.st_qspare[1] = 0LL;
4671                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
4672                         munge_user64_stat64(&source.sb64, &dest.user64_sb64);
4673                         my_size = sizeof(dest.user64_sb64);
4674                         sbp = (caddr_t)&dest.user64_sb64;
4675                 } else {
4676                         munge_user32_stat64(&source.sb64, &dest.user32_sb64);
4677                         my_size = sizeof(dest.user32_sb64);
4678                         sbp = (caddr_t)&dest.user32_sb64;
4679                 }
4680                 /*
4681                  * Check if we raced (post lookup) against the last unlink of a file.
4682                  */
4683                 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
4684                         source.sb64.st_nlink = 1;
4685                 }
4686         } else {
4687                 source.sb.st_lspare = 0;
4688                 source.sb.st_qspare[0] = 0LL;
4689                 source.sb.st_qspare[1] = 0LL;
4690                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
4691                         munge_user64_stat(&source.sb, &dest.user64_sb);
4692                         my_size = sizeof(dest.user64_sb);
4693                         sbp = (caddr_t)&dest.user64_sb;
4694                 } else {
4695                         munge_user32_stat(&source.sb, &dest.user32_sb);
4696                         my_size = sizeof(dest.user32_sb);
4697                         sbp = (caddr_t)&dest.user32_sb;
4698                 }
4699
4700                 /*
4701                  * Check if we raced (post lookup) against the last unlink of a file.
4702                  */
4703                 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
4704                         source.sb.st_nlink = 1;
4705                 }
4706         }
4707         if ((error = copyout(sbp, ub, my_size)) != 0)
4708                 goto out;
4709
4710         /* caller wants extended security information? */
4711         if (xsecurity != USER_ADDR_NULL) {
4712
4713                 /* did we get any? */
4714                 if (fsec == KAUTH_FILESEC_NONE) {
4715                         if (susize(xsecurity_size, 0) != 0) {
4716                                 error = EFAULT;
4717                                 goto out;
4718                         }
4719                 } else {
4720                         /* find the user buffer size */
4721                         xsecurity_bufsize = fusize(xsecurity_size);
4722
4723                         /* copy out the actual data size */
4724                         if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
4725                                 error = EFAULT;
4726                                 goto out;
4727                         }
4728
4729                         /* if the caller supplied enough room, copy out to it */
4730                         if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
4731                                 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
4732                 }
4733         }
4734 out:
4735         if (fsec != KAUTH_FILESEC_NONE)
4736                 kauth_filesec_free(fsec);
4737         return (error);
4738 }
4739
4740 /*
4741  * Get file status; this version follows links.
4742  *
4743  * Returns:     0                       Success
4744  *      stat2:???                       [see stat2() in this file]
4745  */
4746 static int
4747 stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4748 {
4749         struct nameidata nd;
4750         vfs_context_t ctx = vfs_context_current();
4751
4752         NDINIT(&nd, LOOKUP, OP_GETATTR, NOTRIGGER | FOLLOW | AUDITVNPATH1,
4753             UIO_USERSPACE, path, ctx);
4754         return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
4755 }
4756
4757 /*
4758  * stat_extended: Get file status; with extended security (ACL).
4759  *
4760  * Parameters:    p                       (ignored)
4761  *                uap                     User argument descriptor (see below)
4762  *                retval                  (ignored)
4763  *
4764  * Indirect:      uap->path               Path of file to get status from
4765  *                uap->ub                 User buffer (holds file status info)
4766  *                uap->xsecurity          ACL to get (extended security)
4767  *                uap->xsecurity_size     Size of ACL
4768  *
4769  * Returns:        0                      Success
4770  *                !0                      errno value
4771  *
4772  */
4773 int
4774 stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused int32_t *retval)
4775 {
4776         return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
4777 }
4778
4779 /*
4780  * Returns:     0                       Success
4781  *      stat1:???                       [see stat1() in this file]
4782  */
4783 int
4784 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
4785 {
4786         return(stat1(uap->path, uap->ub, 0, 0, 0));
4787 }
4788
4789 int
4790 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
4791 {
4792         return(stat1(uap->path, uap->ub, 0, 0, 1));
4793 }
4794
4795 /*
4796  * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
4797  *
4798  * Parameters:    p                       (ignored)
4799  *                uap                     User argument descriptor (see below)
4800  *                retval                  (ignored)
4801  *
4802  * Indirect:      uap->path               Path of file to get status from
4803  *                uap->ub                 User buffer (holds file status info)
4804  *                uap->xsecurity          ACL to get (extended security)
4805  *                uap->xsecurity_size     Size of ACL
4806  *
4807  * Returns:        0                      Success
4808  *                !0                      errno value
4809  *
4810  */
4811 int
4812 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
4813 {
4814         return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
4815 }
4816 /*
4817  * Get file status; this version does not follow links.
4818  */
4819 static int
4820 lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4821 {
4822         struct nameidata nd;
4823         vfs_context_t ctx = vfs_context_current();
4824
4825         NDINIT(&nd, LOOKUP, OP_GETATTR, NOTRIGGER | NOFOLLOW | AUDITVNPATH1,
4826             UIO_USERSPACE, path, ctx);
4827
4828         return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
4829 }
4830
4831 /*
4832  * lstat_extended: Get file status; does not follow links; with extended security (ACL).
4833  *
4834  * Parameters:    p                       (ignored)
4835  *                uap                     User argument descriptor (see below)
4836  *                retval                  (ignored)
4837  *
4838  * Indirect:      uap->path               Path of file to get status from
4839  *                uap->ub                 User buffer (holds file status info)
4840  *                uap->xsecurity          ACL to get (extended security)
4841  *                uap->xsecurity_size     Size of ACL
4842  *
4843  * Returns:        0                      Success
4844  *                !0                      errno value
4845  *
4846  */
4847 int
4848 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
4849 {
4850         return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
4851 }
4852
4853 int
4854 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
4855 {
4856         return(lstat1(uap->path, uap->ub, 0, 0, 0));
4857 }
4858
4859 int
4860 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
4861 {
4862         return(lstat1(uap->path, uap->ub, 0, 0, 1));
4863 }
4864
4865 /*
4866  * lstat64_extended: Get file status; can handle large inode numbers; does not
4867  * follow links; with extended security (ACL).
4868  *
4869  * Parameters:    p                       (ignored)
4870  *                uap                     User argument descriptor (see below)
4871  *                retval                  (ignored)
4872  *
4873  * Indirect:      uap->path               Path of file to get status from
4874  *                uap->ub                 User buffer (holds file status info)
4875  *                uap->xsecurity          ACL to get (extended security)
4876  *                uap->xsecurity_size     Size of ACL
4877  *
4878  * Returns:        0                      Success
4879  *                !0                      errno value
4880  *
4881  */
4882 int
4883 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
4884 {
4885         return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
4886 }
4887
4888 /*
4889  * Get configurable pathname variables.
4890  *
4891  * Returns:     0                       Success
4892  *      namei:???
4893  *      vn_pathconf:???
4894  *
4895  * Notes:       Global implementation  constants are intended to be
4896  *              implemented in this function directly; all other constants
4897  *              are per-FS implementation, and therefore must be handled in
4898  *              each respective FS, instead.
4899  *
4900  * XXX We implement some things globally right now that should actually be
4901  * XXX per-FS; we will need to deal with this at some point.
4902  */
4903 /* ARGSUSED */
4904 int
4905 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
4906 {
4907         int error;
4908         struct nameidata nd;
4909         vfs_context_t ctx = vfs_context_current();
4910
4911         NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
4912                 UIO_USERSPACE, uap->path, ctx);
4913         error = namei(&nd);
4914         if (error)
4915                 return (error);
4916
4917         error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
4918
4919         vnode_put(nd.ni_vp);
4920         nameidone(&nd);
4921         return (error);
4922 }
4923
4924 /*
4925  * Return target name of a symbolic link.
4926  */
4927 /* ARGSUSED */
4928 int
4929 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
4930 {
4931         vnode_t vp;
4932         uio_t auio;
4933         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
4934         int error;
4935         struct nameidata nd;
4936         vfs_context_t ctx = vfs_context_current();
4937         char uio_buf[ UIO_SIZEOF(1) ];
4938
4939         NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
4940                 UIO_USERSPACE, uap->path, ctx);
4941         error = namei(&nd);
4942         if (error)
4943                 return (error);
4944         vp = nd.ni_vp;
4945
4946         nameidone(&nd);
4947
4948         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
4949                                                                   &uio_buf[0], sizeof(uio_buf));
4950         uio_addiov(auio, uap->buf, uap->count);
4951         if (vp->v_type != VLNK)
4952                 error = EINVAL;
4953         else {
4954 #if CONFIG_MACF
4955                 error = mac_vnode_check_readlink(ctx,
4956                     vp);
4957 #endif
4958                 if (error == 0)
4959                         error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx);
4960                 if (error == 0)
4961                         error = VNOP_READLINK(vp, auio, ctx);
4962         }
4963         vnode_put(vp);
4964
4965         /* Safe: uio_resid() is bounded above by "count", and "count" is an int  */
4966         *retval = uap->count - (int)uio_resid(auio);
4967         return (error);
4968 }
4969
4970 /*
4971  * Change file flags.
4972  */
4973 static int
4974 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
4975 {
4976         struct vnode_attr va;
4977         kauth_action_t action;
4978         int error;
4979
4980         VATTR_INIT(&va);
4981         VATTR_SET(&va, va_flags, flags);
4982
4983 #if CONFIG_MACF
4984         error = mac_vnode_check_setflags(ctx, vp, flags);
4985         if (error)
4986                 goto out;
4987 #endif
4988
4989         /* request authorisation, disregard immutability */
4990         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4991                 goto out;
4992         /*
4993          * Request that the auth layer disregard those file flags it's allowed to when
4994          * authorizing this operation; we need to do this in order to be able to
4995          * clear immutable flags.
4996          */
4997         if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
4998                 goto out;
4999         error = vnode_setattr(vp, &va, ctx);
5000
5001         if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5002                 error = ENOTSUP;
5003         }
5004 out:
5005         vnode_put(vp);
5006         return(error);
5007 }
5008
5009 /*
5010  * Change flags of a file given a path name.
5011  */
5012 /* ARGSUSED */
5013 int
5014 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
5015 {
5016         vnode_t vp;
5017         vfs_context_t ctx = vfs_context_current();
5018         int error;
5019         struct nameidata nd;
5020
5021         AUDIT_ARG(fflags, uap->flags);
5022         NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5023                 UIO_USERSPACE, uap->path, ctx);
5024         error = namei(&nd);
5025         if (error)
5026                 return (error);
5027         vp = nd.ni_vp;
5028         nameidone(&nd);
5029
5030         error = chflags1(vp, uap->flags, ctx);
5031
5032         return(error);
5033 }
5034
5035 /*
5036  * Change flags of a file given a file descriptor.
5037  */
5038 /* ARGSUSED */
5039 int
5040 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
5041 {
5042         vnode_t vp;
5043         int error;
5044
5045         AUDIT_ARG(fd, uap->fd);
5046         AUDIT_ARG(fflags, uap->flags);
5047         if ( (error = file_vnode(uap->fd, &vp)) )
5048                 return (error);
5049
5050         if ((error = vnode_getwithref(vp))) {
5051                 file_drop(uap->fd);
5052                 return(error);
5053         }
5054
5055         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5056
5057         error = chflags1(vp, uap->flags, vfs_context_current());
5058
5059         file_drop(uap->fd);
5060         return (error);
5061 }
5062
5063 /*
5064  * Change security information on a filesystem object.
5065  *
5066  * Returns:     0                       Success
5067  *              EPERM                   Operation not permitted
5068  *              vnode_authattr:???      [anything vnode_authattr can return]
5069  *              vnode_authorize:???     [anything vnode_authorize can return]
5070  *              vnode_setattr:???       [anything vnode_setattr can return]
5071  *
5072  * Notes:       If vnode_authattr or vnode_authorize return EACCES, it will be
5073  *              translated to EPERM before being returned.
5074  */
5075 static int
5076 chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
5077 {
5078         kauth_action_t action;
5079         int error;
5080
5081         AUDIT_ARG(mode, vap->va_mode);
5082         /* XXX audit new args */
5083
5084 #if NAMEDSTREAMS
5085         /* chmod calls are not allowed for resource forks. */
5086         if (vp->v_flag & VISNAMEDSTREAM) {
5087                 return (EPERM);
5088         }
5089 #endif
5090
5091 #if CONFIG_MACF
5092         error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode);
5093         if (error)
5094                 return (error);
5095 #endif
5096
5097         /* make sure that the caller is allowed to set this security information */
5098         if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
5099             ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5100                 if (error == EACCES)
5101                         error = EPERM;
5102                 return(error);
5103         }
5104
5105         error = vnode_setattr(vp, vap, ctx);
5106
5107         return (error);
5108 }
5109
5110
5111 /*
5112  * Change mode of a file given a path name.
5113  *
5114  * Returns:     0                       Success
5115  *              namei:???               [anything namei can return]
5116  *              chmod2:???              [anything chmod2 can return]
5117  */
5118 static int
5119 chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
5120 {
5121         struct nameidata nd;
5122         int error;
5123
5124         NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5125                 UIO_USERSPACE, path, ctx);
5126         if ((error = namei(&nd)))
5127                 return (error);
5128         error = chmod2(ctx, nd.ni_vp, vap);
5129         vnode_put(nd.ni_vp);
5130         nameidone(&nd);
5131         return(error);
5132 }
5133
5134 /*
5135  * chmod_extended: Change the mode of a file given a path name; with extended
5136  * argument list (including extended security (ACL)).
5137  *
5138  * Parameters:  p                       Process requesting the open
5139  *              uap                     User argument descriptor (see below)
5140  *              retval                  (ignored)
5141  *
5142  * Indirect:    uap->path               Path to object (same as 'chmod')
5143  *              uap->uid                UID to set
5144  *              uap->gid                GID to set
5145  *              uap->mode               File mode to set (same as 'chmod')
5146  *              uap->xsecurity          ACL to set (or delete)
5147  *
5148  * Returns:     0                       Success
5149  *              !0                      errno value
5150  *
5151  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
5152  *
5153  * XXX:         We should enummerate the possible errno values here, and where
5154  *              in the code they originated.
5155  */
5156 int
5157 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
5158 {
5159         int error;
5160         struct vnode_attr va;
5161         kauth_filesec_t xsecdst;
5162
5163         AUDIT_ARG(owner, uap->uid, uap->gid);
5164
5165         VATTR_INIT(&va);
5166         if (uap->mode != -1)
5167                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5168         if (uap->uid != KAUTH_UID_NONE)
5169                 VATTR_SET(&va, va_uid, uap->uid);
5170         if (uap->gid != KAUTH_GID_NONE)
5171                 VATTR_SET(&va, va_gid, uap->gid);
5172
5173         xsecdst = NULL;
5174         switch(uap->xsecurity) {
5175                 /* explicit remove request */
5176         case CAST_USER_ADDR_T((void *)1):       /* _FILESEC_REMOVE_ACL */
5177                 VATTR_SET(&va, va_acl, NULL);
5178                 break;
5179                 /* not being set */
5180         case USER_ADDR_NULL:
5181                 break;
5182         default:
5183                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5184                         return(error);
5185                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5186                 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
5187         }
5188
5189         error = chmod1(vfs_context_current(), uap->path, &va);
5190
5191         if (xsecdst != NULL)
5192                 kauth_filesec_free(xsecdst);
5193         return(error);
5194 }
5195
5196 /*
5197  * Returns:     0                       Success
5198  *              chmod1:???              [anything chmod1 can return]
5199  */
5200 int
5201 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
5202 {
5203         struct vnode_attr va;
5204
5205         VATTR_INIT(&va);
5206         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5207
5208         return(chmod1(vfs_context_current(), uap->path, &va));
5209 }
5210
5211 /*
5212  * Change mode of a file given a file descriptor.
5213  */
5214 static int
5215 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
5216 {
5217         vnode_t vp;
5218         int error;
5219
5220         AUDIT_ARG(fd, fd);
5221
5222         if ((error = file_vnode(fd, &vp)) != 0)
5223                 return (error);
5224         if ((error = vnode_getwithref(vp)) != 0) {
5225                 file_drop(fd);
5226                 return(error);
5227         }
5228         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5229
5230         error = chmod2(vfs_context_current(), vp, vap);
5231         (void)vnode_put(vp);
5232         file_drop(fd);
5233
5234         return (error);
5235 }
5236
5237 /*
5238  * fchmod_extended: Change mode of a file given a file descriptor; with
5239  * extended argument list (including extended security (ACL)).
5240  *
5241  * Parameters:    p                       Process requesting to change file mode
5242  *                uap                     User argument descriptor (see below)
5243  *                retval                  (ignored)
5244  *
5245  * Indirect:      uap->mode               File mode to set (same as 'chmod')
5246  *                uap->uid                UID to set
5247  *                uap->gid                GID to set
5248  *                uap->xsecurity          ACL to set (or delete)
5249  *                uap->fd                 File descriptor of file to change mode
5250  *
5251  * Returns:        0                      Success
5252  *                !0                      errno value
5253  *
5254  */
5255 int
5256 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
5257 {
5258         int error;
5259         struct vnode_attr va;
5260         kauth_filesec_t xsecdst;
5261
5262         AUDIT_ARG(owner, uap->uid, uap->gid);
5263
5264         VATTR_INIT(&va);
5265         if (uap->mode != -1)
5266                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5267         if (uap->uid != KAUTH_UID_NONE)
5268                 VATTR_SET(&va, va_uid, uap->uid);
5269         if (uap->gid != KAUTH_GID_NONE)
5270                 VATTR_SET(&va, va_gid, uap->gid);
5271
5272         xsecdst = NULL;
5273         switch(uap->xsecurity) {
5274         case USER_ADDR_NULL:
5275                 VATTR_SET(&va, va_acl, NULL);
5276                 break;
5277         case CAST_USER_ADDR_T(-1):
5278                 break;
5279         default:
5280                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5281                         return(error);
5282                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5283         }
5284
5285         error = fchmod1(p, uap->fd, &va);
5286
5287
5288         switch(uap->xsecurity) {
5289         case USER_ADDR_NULL:
5290         case CAST_USER_ADDR_T(-1):
5291                 break;
5292         default:
5293                 if (xsecdst != NULL)
5294                         kauth_filesec_free(xsecdst);
5295         }
5296         return(error);
5297 }
5298
5299 int
5300 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
5301 {
5302         struct vnode_attr va;
5303
5304         VATTR_INIT(&va);
5305         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5306
5307         return(fchmod1(p, uap->fd, &va));
5308 }
5309
5310
5311 /*
5312  * Set ownership given a path name.
5313  */
5314 /* ARGSUSED */
5315 static int
5316 chown1(vfs_context_t ctx, struct chown_args *uap, __unused int32_t *retval, int follow)
5317 {
5318         vnode_t vp;
5319         struct vnode_attr va;
5320         int error;
5321         struct nameidata nd;
5322         kauth_action_t action;
5323
5324         AUDIT_ARG(owner, uap->uid, uap->gid);
5325
5326         NDINIT(&nd, LOOKUP, OP_SETATTR,
5327                 (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1,
5328                 UIO_USERSPACE, uap->path, ctx);
5329         error = namei(&nd);
5330         if (error)
5331                 return (error);
5332         vp = nd.ni_vp;
5333
5334         nameidone(&nd);
5335
5336         VATTR_INIT(&va);
5337         if (uap->uid != VNOVAL)
5338                 VATTR_SET(&va, va_uid, uap->uid);
5339         if (uap->gid != VNOVAL)
5340                 VATTR_SET(&va, va_gid, uap->gid);
5341
5342 #if CONFIG_MACF
5343         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
5344         if (error)
5345                 goto out;
5346 #endif
5347
5348         /* preflight and authorize attribute changes */
5349         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5350                 goto out;
5351         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
5352                 goto out;
5353         error = vnode_setattr(vp, &va, ctx);
5354
5355 out:
5356         /*
5357          * EACCES is only allowed from namei(); permissions failure should
5358          * return EPERM, so we need to translate the error code.
5359          */
5360         if (error == EACCES)
5361                 error = EPERM;
5362
5363         vnode_put(vp);
5364         return (error);
5365 }
5366
5367 int
5368 chown(__unused proc_t p, struct chown_args *uap, int32_t *retval)
5369 {
5370         return chown1(vfs_context_current(), uap, retval, 1);
5371 }
5372
5373 int
5374 lchown(__unused proc_t p, struct lchown_args *uap, int32_t *retval)
5375 {
5376         /* Argument list identical, but machine generated; cast for chown1() */
5377         return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0);
5378 }
5379
5380 /*
5381  * Set ownership given a file descriptor.
5382  */
5383 /* ARGSUSED */
5384 int
5385 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
5386 {
5387         struct vnode_attr va;
5388         vfs_context_t ctx = vfs_context_current();
5389         vnode_t vp;
5390         int error;
5391         kauth_action_t action;
5392
5393         AUDIT_ARG(owner, uap->uid, uap->gid);
5394         AUDIT_ARG(fd, uap->fd);
5395
5396         if ( (error = file_vnode(uap->fd, &vp)) )
5397                 return (error);
5398
5399         if ( (error = vnode_getwithref(vp)) ) {
5400                 file_drop(uap->fd);
5401                 return(error);
5402         }
5403         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5404
5405         VATTR_INIT(&va);
5406         if (uap->uid != VNOVAL)
5407                 VATTR_SET(&va, va_uid, uap->uid);
5408         if (uap->gid != VNOVAL)
5409                 VATTR_SET(&va, va_gid, uap->gid);
5410
5411 #if NAMEDSTREAMS
5412         /* chown calls are not allowed for resource forks. */
5413         if (vp->v_flag & VISNAMEDSTREAM) {
5414                 error = EPERM;
5415                 goto out;
5416         }
5417 #endif
5418
5419 #if CONFIG_MACF
5420         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
5421         if (error)
5422                 goto out;
5423 #endif
5424
5425         /* preflight and authorize attribute changes */
5426         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5427                 goto out;
5428         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5429                 if (error == EACCES)
5430                         error = EPERM;
5431                 goto out;
5432         }
5433         error = vnode_setattr(vp, &va, ctx);
5434
5435 out:
5436         (void)vnode_put(vp);
5437         file_drop(uap->fd);
5438         return (error);
5439 }
5440
5441 static int
5442 getutimes(user_addr_t usrtvp, struct timespec *tsp)
5443 {
5444         int error;
5445
5446         if (usrtvp == USER_ADDR_NULL) {
5447                 struct timeval old_tv;
5448                 /* XXX Y2038 bug because of microtime argument */
5449                 microtime(&old_tv);
5450                 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
5451                 tsp[1] = tsp[0];
5452         } else {
5453                 if (IS_64BIT_PROCESS(current_proc())) {
5454                         struct user64_timeval tv[2];
5455                         error = copyin(usrtvp, (void *)tv, sizeof(tv));
5456                         if (error)
5457                                 return (error);
5458                         TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
5459                         TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
5460                 } else {
5461                         struct user32_timeval tv[2];
5462                         error = copyin(usrtvp, (void *)tv, sizeof(tv));
5463                         if (error)
5464                                 return (error);
5465                         TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
5466                         TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
5467                 }
5468         }
5469         return 0;
5470 }
5471
5472 static int
5473 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
5474         int nullflag)
5475 {
5476         int error;
5477         struct vnode_attr va;
5478         kauth_action_t action;
5479
5480         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5481
5482         VATTR_INIT(&va);
5483         VATTR_SET(&va, va_access_time, ts[0]);
5484         VATTR_SET(&va, va_modify_time, ts[1]);
5485         if (nullflag)
5486                 va.va_vaflags |= VA_UTIMES_NULL;
5487
5488 #if NAMEDSTREAMS
5489         /* utimes calls are not allowed for resource forks. */
5490         if (vp->v_flag & VISNAMEDSTREAM) {
5491                 error = EPERM;
5492                 goto out;
5493         }
5494 #endif
5495
5496 #if CONFIG_MACF
5497         error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
5498         if (error)
5499                 goto out;
5500 #endif
5501         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
5502                 if (!nullflag && error == EACCES)
5503                         error = EPERM;
5504                 goto out;
5505         }
5506
5507         /* since we may not need to auth anything, check here */
5508         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5509                 if (!nullflag && error == EACCES)
5510                         error = EPERM;
5511                 goto out;
5512         }
5513         error = vnode_setattr(vp, &va, ctx);
5514
5515 out:
5516         return error;
5517 }
5518
5519 /*
5520  * Set the access and modification times of a file.
5521  */
5522 /* ARGSUSED */
5523 int
5524 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
5525 {
5526         struct timespec ts[2];
5527         user_addr_t usrtvp;
5528         int error;
5529         struct nameidata nd;
5530         vfs_context_t ctx = vfs_context_current();
5531
5532         /*
5533          * AUDIT: Needed to change the order of operations to do the
5534          * name lookup first because auditing wants the path.
5535          */
5536         NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5537                 UIO_USERSPACE, uap->path, ctx);
5538         error = namei(&nd);
5539         if (error)
5540                 return (error);
5541         nameidone(&nd);
5542
5543         /*
5544          * Fetch the user-supplied time.  If usrtvp is USER_ADDR_NULL, we fetch
5545          * the current time instead.
5546          */
5547         usrtvp = uap->tptr;
5548         if ((error = getutimes(usrtvp, ts)) != 0)
5549                 goto out;
5550
5551         error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
5552
5553 out:
5554         vnode_put(nd.ni_vp);
5555         return (error);
5556 }
5557
5558 /*
5559  * Set the access and modification times of a file.
5560  */
5561 /* ARGSUSED */
5562 int
5563 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
5564 {
5565         struct timespec ts[2];
5566         vnode_t vp;
5567         user_addr_t usrtvp;
5568         int error;
5569
5570         AUDIT_ARG(fd, uap->fd);
5571         usrtvp = uap->tptr;
5572         if ((error = getutimes(usrtvp, ts)) != 0)
5573                 return (error);
5574         if ((error = file_vnode(uap->fd, &vp)) != 0)
5575                 return (error);
5576         if((error = vnode_getwithref(vp))) {
5577                 file_drop(uap->fd);
5578                 return(error);
5579         }
5580
5581         error =  setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
5582         vnode_put(vp);
5583         file_drop(uap->fd);
5584         return(error);
5585 }
5586
5587 /*
5588  * Truncate a file given its path name.
5589  */
5590 /* ARGSUSED */
5591 int
5592 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
5593 {
5594         vnode_t vp;
5595         struct vnode_attr va;
5596         vfs_context_t ctx = vfs_context_current();
5597         int error;
5598         struct nameidata nd;
5599         kauth_action_t action;
5600
5601         if (uap->length < 0)
5602                 return(EINVAL);
5603         NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
5604                 UIO_USERSPACE, uap->path, ctx);
5605         if ((error = namei(&nd)))
5606                 return (error);
5607         vp = nd.ni_vp;
5608
5609         nameidone(&nd);
5610
5611         VATTR_INIT(&va);
5612         VATTR_SET(&va, va_data_size, uap->length);
5613
5614 #if CONFIG_MACF
5615         error = mac_vnode_check_truncate(ctx, NOCRED, vp);
5616         if (error)
5617                 goto out;
5618 #endif
5619
5620         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5621                 goto out;
5622         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
5623                 goto out;
5624         error = vnode_setattr(vp, &va, ctx);
5625 out:
5626         vnode_put(vp);
5627         return (error);
5628 }
5629
5630 /*
5631  * Truncate a file given a file descriptor.
5632  */
5633 /* ARGSUSED */
5634 int
5635 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
5636 {
5637         vfs_context_t ctx = vfs_context_current();
5638         struct vnode_attr va;
5639         vnode_t vp;
5640         struct fileproc *fp;
5641         int error ;
5642         int fd = uap->fd;
5643
5644         AUDIT_ARG(fd, uap->fd);
5645         if (uap->length < 0)
5646                 return(EINVAL);
5647
5648         if ( (error = fp_lookup(p,fd,&fp,0)) ) {
5649                 return(error);
5650         }
5651
5652         if (fp->f_fglob->fg_type == DTYPE_PSXSHM) {
5653                 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
5654                 goto out;
5655         }
5656         if (fp->f_fglob->fg_type != DTYPE_VNODE)  {
5657                 error = EINVAL;
5658                 goto out;
5659         }
5660
5661         vp = (vnode_t)fp->f_fglob->fg_data;
5662
5663         if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
5664                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
5665                 error = EINVAL;
5666                 goto out;
5667         }
5668
5669         if ((error = vnode_getwithref(vp)) != 0) {
5670                 goto out;
5671         }
5672
5673         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5674
5675 #if CONFIG_MACF
5676         error = mac_vnode_check_truncate(ctx,
5677             fp->f_fglob->fg_cred, vp);
5678         if (error) {
5679                 (void)vnode_put(vp);
5680                 goto out;
5681         }
5682 #endif
5683         VATTR_INIT(&va);
5684         VATTR_SET(&va, va_data_size, uap->length);
5685         error = vnode_setattr(vp, &va, ctx);
5686         (void)vnode_put(vp);
5687 out:
5688         file_drop(fd);
5689         return (error);
5690 }
5691
5692
5693 /*
5694  * Sync an open file with synchronized I/O _file_ integrity completion
5695  */
5696 /* ARGSUSED */
5697 int
5698 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
5699 {
5700         __pthread_testcancel(1);
5701         return(fsync_common(p, uap, MNT_WAIT));
5702 }
5703
5704
5705 /*
5706  * Sync an open file with synchronized I/O _file_ integrity completion
5707  *
5708  * Notes:       This is a legacy support function that does not test for
5709  *              thread cancellation points.
5710  */
5711 /* ARGSUSED */
5712 int
5713 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
5714 {
5715         return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
5716 }
5717
5718
5719 /*
5720  * Sync an open file with synchronized I/O _data_ integrity completion
5721  */
5722 /* ARGSUSED */
5723 int
5724 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
5725 {
5726         __pthread_testcancel(1);
5727         return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
5728 }
5729
5730
5731 /*
5732  * fsync_common
5733  *
5734  * Common fsync code to support both synchronized I/O file integrity completion
5735  * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
5736  *
5737  * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
5738  * will only guarantee that the file data contents are retrievable.  If
5739  * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
5740  * includes additional metadata unnecessary for retrieving the file data
5741  * contents, such as atime, mtime, ctime, etc., also be committed to stable
5742  * storage.
5743  *
5744  * Parameters:  p                               The process
5745  *              uap->fd                         The descriptor to synchronize
5746  *              flags                           The data integrity flags
5747  *
5748  * Returns:     int                             Success
5749  *      fp_getfvp:EBADF                         Bad file descriptor
5750  *      fp_getfvp:ENOTSUP                       fd does not refer to a vnode
5751  *      VNOP_FSYNC:???                          unspecified
5752  *
5753  * Notes:       We use struct fsync_args because it is a short name, and all
5754  *              caller argument structures are otherwise identical.
5755  */
5756 static int
5757 fsync_common(proc_t p, struct fsync_args *uap, int flags)
5758 {
5759         vnode_t vp;
5760         struct fileproc *fp;
5761         vfs_context_t ctx = vfs_context_current();
5762         int error;
5763
5764         AUDIT_ARG(fd, uap->fd);
5765
5766         if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
5767                 return (error);
5768         if ( (error = vnode_getwithref(vp)) ) {
5769                 file_drop(uap->fd);
5770                 return(error);
5771         }
5772
5773         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5774
5775         error = VNOP_FSYNC(vp, flags, ctx);
5776
5777 #if NAMEDRSRCFORK
5778         /* Sync resource fork shadow file if necessary. */
5779         if ((error == 0) &&
5780             (vp->v_flag & VISNAMEDSTREAM) &&
5781             (vp->v_parent != NULLVP) &&
5782             vnode_isshadow(vp) &&
5783             (fp->f_flags & FP_WRITTEN)) {
5784                 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
5785         }
5786 #endif
5787
5788         (void)vnode_put(vp);
5789         file_drop(uap->fd);
5790         return (error);
5791 }
5792
5793 /*
5794  * Duplicate files.  Source must be a file, target must be a file or
5795  * must not exist.
5796  *
5797  * XXX Copyfile authorisation checking is woefully inadequate, and will not
5798  *     perform inheritance correctly.
5799  */
5800 /* ARGSUSED */
5801 int
5802 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
5803 {
5804         vnode_t tvp, fvp, tdvp, sdvp;
5805         struct nameidata fromnd, tond;
5806         int error;
5807         vfs_context_t ctx = vfs_context_current();
5808
5809         /* Check that the flags are valid. */
5810
5811         if (uap->flags & ~CPF_MASK) {
5812                 return(EINVAL);
5813         }
5814
5815         NDINIT(&fromnd, LOOKUP, OP_COPYFILE, SAVESTART | AUDITVNPATH1,
5816                 UIO_USERSPACE, uap->from, ctx);
5817         if ((error = namei(&fromnd)))
5818                 return (error);
5819         fvp = fromnd.ni_vp;
5820
5821         NDINIT(&tond, CREATE, OP_LINK,
5822                LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
5823                UIO_USERSPACE, uap->to, ctx);
5824         if ((error = namei(&tond))) {
5825                 goto out1;
5826         }
5827         tdvp = tond.ni_dvp;
5828         tvp = tond.ni_vp;
5829
5830         if (tvp != NULL) {
5831                 if (!(uap->flags & CPF_OVERWRITE)) {
5832                         error = EEXIST;
5833                         goto out;
5834                 }
5835         }
5836         if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
5837                 error = EISDIR;
5838                 goto out;
5839         }
5840
5841         if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
5842                 goto out;
5843
5844         if (fvp == tdvp)
5845                 error = EINVAL;
5846         /*
5847          * If source is the same as the destination (that is the
5848          * same inode number) then there is nothing to do.
5849          * (fixed to have POSIX semantics - CSM 3/2/98)
5850          */
5851         if (fvp == tvp)
5852                 error = -1;
5853         if (!error)
5854                 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
5855 out:
5856         sdvp = tond.ni_startdir;
5857         /*
5858          * nameidone has to happen before we vnode_put(tdvp)
5859          * since it may need to release the fs_nodelock on the tdvp
5860          */
5861         nameidone(&tond);
5862
5863         if (tvp)
5864                 vnode_put(tvp);
5865         vnode_put(tdvp);
5866         vnode_put(sdvp);
5867 out1:
5868         vnode_put(fvp);
5869
5870         if (fromnd.ni_startdir)
5871                 vnode_put(fromnd.ni_startdir);
5872         nameidone(&fromnd);
5873
5874         if (error == -1)
5875                 return (0);
5876         return (error);
5877 }
5878
5879
5880 /*
5881  * Rename files.  Source and destination must either both be directories,
5882  * or both not be directories.  If target is a directory, it must be empty.
5883  */
5884 /* ARGSUSED */
5885 int
5886 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
5887 {
5888         vnode_t tvp, tdvp;
5889         vnode_t fvp, fdvp;
5890         struct nameidata fromnd, tond;
5891         vfs_context_t ctx = vfs_context_current();
5892         int error;
5893         int do_retry;
5894         int mntrename;
5895         int need_event;
5896         const char *oname = NULL;
5897         char *from_name = NULL, *to_name = NULL;
5898         int from_len=0, to_len=0;
5899         int holding_mntlock;
5900         mount_t locked_mp = NULL;
5901         vnode_t oparent = NULLVP;
5902 #if CONFIG_FSE
5903         fse_info from_finfo, to_finfo;
5904         struct vnode_attr fva, tva;
5905 #endif
5906         int from_truncated=0, to_truncated;
5907         int batched = 0;
5908         struct vnode_attr *fvap, *tvap;
5909         int continuing = 0;
5910
5911         holding_mntlock = 0;
5912     do_retry = 0;
5913 retry:
5914         fvp = tvp = NULL;
5915         fdvp = tdvp = NULL;
5916         fvap = tvap = NULL;
5917         mntrename = FALSE;
5918
5919         NDINIT(&fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
5920                UIO_USERSPACE, uap->from, ctx);
5921         fromnd.ni_flag = NAMEI_COMPOUNDRENAME;
5922
5923         NDINIT(&tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
5924                UIO_USERSPACE, uap->to, ctx);
5925         tond.ni_flag = NAMEI_COMPOUNDRENAME;
5926
5927 continue_lookup:
5928         if ((fromnd.ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
5929                 if ( (error = namei(&fromnd)) )
5930                         goto out1;
5931                 fdvp = fromnd.ni_dvp;
5932                 fvp  = fromnd.ni_vp;
5933
5934                 if (fvp && fvp->v_type == VDIR)
5935                         tond.ni_cnd.cn_flags |= WILLBEDIR;
5936         }
5937
5938         if ((tond.ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
5939                 if ( (error = namei(&tond)) ) {
5940                         /*
5941                          * Translate error code for rename("dir1", "dir2/.").
5942                          */
5943                         if (error == EISDIR && fvp->v_type == VDIR)
5944                                 error = EINVAL;
5945                         goto out1;
5946                 }
5947                 tdvp = tond.ni_dvp;
5948                 tvp  = tond.ni_vp;
5949         }
5950
5951         batched = vnode_compound_rename_available(fdvp);
5952         if (!fvp) {
5953                 /*
5954                  * Claim: this check will never reject a valid rename.
5955                  * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
5956                  * Suppose fdvp and tdvp are not on the same mount.
5957                  * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem.  If fvp is the root,
5958                  *      then you can't move it to within another dir on the same mountpoint.
5959                  * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
5960                  *
5961                  * If this check passes, then we are safe to pass these vnodes to the same FS.
5962                  */
5963                 if (fdvp->v_mount != tdvp->v_mount) {
5964                         error = EXDEV;
5965                         goto out1;
5966                 }
5967                 goto skipped_lookup;
5968         }
5969
5970         if (!batched) {
5971                 error = vn_authorize_rename(fdvp, fvp, &fromnd.ni_cnd, tdvp, tvp, &tond.ni_cnd, ctx, NULL);
5972                 if (error) {
5973                         if (error == ENOENT) {
5974                                 /*
5975                                  * We encountered a race where after doing the namei, tvp stops
5976                                  * being valid. If so, simply re-drive the rename call from the
5977                                  * top.
5978                                  */
5979                                 do_retry = 1;
5980                         }
5981                         goto out1;
5982                 }
5983         }
5984
5985         /*
5986          * If the source and destination are the same (i.e. they're
5987          * links to the same vnode) and the target file system is
5988          * case sensitive, then there is nothing to do.
5989          *
5990          * XXX Come back to this.
5991          */
5992         if (fvp == tvp) {
5993                 int pathconf_val;
5994
5995                 /*
5996                  * Note: if _PC_CASE_SENSITIVE selector isn't supported,
5997                  * then assume that this file system is case sensitive.
5998                  */
5999                 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
6000                     pathconf_val != 0) {
6001                         goto out1;
6002                 }
6003         }
6004
6005         /*
6006          * Allow the renaming of mount points.
6007          * - target must not exist
6008          * - target must reside in the same directory as source
6009          * - union mounts cannot be renamed
6010          * - "/" cannot be renamed
6011          *
6012          * XXX Handle this in VFS after a continued lookup (if we missed
6013          * in the cache to start off)
6014          */
6015         if ((fvp->v_flag & VROOT) &&
6016             (fvp->v_type == VDIR) &&
6017             (tvp == NULL)  &&
6018             (fvp->v_mountedhere == NULL)  &&
6019             (fdvp == tdvp)  &&
6020             ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0)  &&
6021             (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
6022                 vnode_t coveredvp;
6023
6024                 /* switch fvp to the covered vnode */
6025                 coveredvp = fvp->v_mount->mnt_vnodecovered;
6026                 if ( (vnode_getwithref(coveredvp)) ) {
6027                         error = ENOENT;
6028                         goto out1;
6029                 }
6030                 vnode_put(fvp);
6031
6032                 fvp = coveredvp;
6033                 mntrename = TRUE;
6034         }
6035         /*
6036          * Check for cross-device rename.
6037          */
6038         if ((fvp->v_mount != tdvp->v_mount) ||
6039             (tvp && (fvp->v_mount != tvp->v_mount))) {
6040                 error = EXDEV;
6041                 goto out1;
6042         }
6043
6044         /*
6045          * If source is the same as the destination (that is the
6046          * same inode number) then there is nothing to do...
6047          * EXCEPT if the underlying file system supports case
6048          * insensitivity and is case preserving.  In this case
6049          * the file system needs to handle the special case of
6050          * getting the same vnode as target (fvp) and source (tvp).
6051          *
6052          * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
6053          * and _PC_CASE_PRESERVING can have this exception, and they need to
6054          * handle the special case of getting the same vnode as target and
6055          * source.  NOTE: Then the target is unlocked going into vnop_rename,
6056          * so not to cause locking problems. There is a single reference on tvp.
6057          *
6058          * NOTE - that fvp == tvp also occurs if they are hard linked and
6059          * that correct behaviour then is just to return success without doing
6060          * anything.
6061          *
6062          * XXX filesystem should take care of this itself, perhaps...
6063          */
6064         if (fvp == tvp && fdvp == tdvp) {
6065                 if (fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
6066                     !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
6067                           fromnd.ni_cnd.cn_namelen)) {
6068                         goto out1;
6069                 }
6070         }
6071
6072         if (holding_mntlock && fvp->v_mount != locked_mp) {
6073                 /*
6074                  * we're holding a reference and lock
6075                  * on locked_mp, but it no longer matches
6076                  * what we want to do... so drop our hold
6077                  */
6078                 mount_unlock_renames(locked_mp);
6079                 mount_drop(locked_mp, 0);
6080                 holding_mntlock = 0;
6081         }
6082         if (tdvp != fdvp && fvp->v_type == VDIR) {
6083                 /*
6084                  * serialize renames that re-shape
6085                  * the tree... if holding_mntlock is
6086                  * set, then we're ready to go...
6087                  * otherwise we
6088                  * first need to drop the iocounts
6089                  * we picked up, second take the
6090                  * lock to serialize the access,
6091                  * then finally start the lookup
6092                  * process over with the lock held
6093                  */
6094                 if (!holding_mntlock) {
6095                         /*
6096                          * need to grab a reference on
6097                          * the mount point before we
6098                          * drop all the iocounts... once
6099                          * the iocounts are gone, the mount
6100                          * could follow
6101                          */
6102                         locked_mp = fvp->v_mount;
6103                         mount_ref(locked_mp, 0);
6104
6105                         /*
6106                          * nameidone has to happen before we vnode_put(tvp)
6107                          * since it may need to release the fs_nodelock on the tvp
6108                          */
6109                         nameidone(&tond);
6110
6111                         if (tvp)
6112                                 vnode_put(tvp);
6113                         vnode_put(tdvp);
6114
6115                         /*
6116                          * nameidone has to happen before we vnode_put(fdvp)
6117                          * since it may need to release the fs_nodelock on the fvp
6118                          */
6119                         nameidone(&fromnd);
6120
6121                         vnode_put(fvp);
6122                         vnode_put(fdvp);
6123
6124                         mount_lock_renames(locked_mp);
6125                         holding_mntlock = 1;
6126
6127                         goto retry;
6128                 }
6129         } else {
6130                 /*
6131                  * when we dropped the iocounts to take
6132                  * the lock, we allowed the identity of
6133                  * the various vnodes to change... if they did,
6134                  * we may no longer be dealing with a rename
6135                  * that reshapes the tree... once we're holding
6136                  * the iocounts, the vnodes can't change type
6137                  * so we're free to drop the lock at this point
6138                  * and continue on
6139                  */
6140                 if (holding_mntlock) {
6141                         mount_unlock_renames(locked_mp);
6142                         mount_drop(locked_mp, 0);
6143                         holding_mntlock = 0;
6144                 }
6145         }
6146
6147         // save these off so we can later verify that fvp is the same
6148         oname   = fvp->v_name;
6149         oparent = fvp->v_parent;
6150
6151 skipped_lookup:
6152 #if CONFIG_FSE
6153         need_event = need_fsevent(FSE_RENAME, fdvp);
6154         if (need_event) {
6155                 if (fvp) {
6156                         get_fse_info(fvp, &from_finfo, ctx);
6157                 } else {
6158                         error = vfs_get_notify_attributes(&fva);
6159                         if (error) {
6160                                 goto out1;
6161                         }
6162
6163                         fvap = &fva;
6164                 }
6165
6166                 if (tvp) {
6167                         get_fse_info(tvp, &to_finfo, ctx);
6168                 } else if (batched) {
6169                         error = vfs_get_notify_attributes(&tva);
6170                         if (error) {
6171                                 goto out1;
6172                         }
6173
6174                         tvap = &tva;
6175                 }
6176         }
6177 #else
6178         need_event = 0;
6179 #endif /* CONFIG_FSE */
6180
6181         if (need_event || kauth_authorize_fileop_has_listeners()) {
6182                 if (from_name == NULL) {
6183                         GET_PATH(from_name);
6184                         if (from_name == NULL) {
6185                                 error = ENOMEM;
6186                                 goto out1;
6187                         }
6188                 }
6189
6190                 from_len = safe_getpath(fdvp, fromnd.ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
6191
6192                 if (to_name == NULL) {
6193                         GET_PATH(to_name);
6194                         if (to_name == NULL) {
6195                                 error = ENOMEM;
6196                                 goto out1;
6197                         }
6198                 }
6199
6200                 to_len = safe_getpath(tdvp, tond.ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
6201         }
6202
6203         error = vn_rename(fdvp, &fvp, &fromnd.ni_cnd, fvap,
6204                             tdvp, &tvp, &tond.ni_cnd, tvap,
6205                             0, ctx);
6206
6207         if (holding_mntlock) {
6208                 /*
6209                  * we can drop our serialization
6210                  * lock now
6211                  */
6212                 mount_unlock_renames(locked_mp);
6213                 mount_drop(locked_mp, 0);
6214                 holding_mntlock = 0;
6215         }
6216         if (error) {
6217                 if (error == EKEEPLOOKING) {
6218                         if ((fromnd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
6219                                 if ((tond.ni_flag & NAMEI_CONTLOOKUP) == 0) {
6220                                         panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
6221                                 }
6222                         }
6223
6224                         fromnd.ni_vp = fvp;
6225                         tond.ni_vp = tvp;
6226
6227                         goto continue_lookup;
6228                 }
6229
6230                 /*
6231                  * We may encounter a race in the VNOP where the destination didn't
6232                  * exist when we did the namei, but it does by the time we go and
6233                  * try to create the entry. In this case, we should re-drive this rename
6234                  * call from the top again.  Currently, only HFS bubbles out ERECYCLE,
6235                  * but other filesystems susceptible to this race could return it, too.
6236                  */
6237                 if (error == ERECYCLE) {
6238                         do_retry = 1;
6239                 }
6240
6241                 goto out1;
6242         }
6243
6244         /* call out to allow 3rd party notification of rename.
6245          * Ignore result of kauth_authorize_fileop call.
6246          */
6247         kauth_authorize_fileop(vfs_context_ucred(ctx),
6248                         KAUTH_FILEOP_RENAME,
6249                         (uintptr_t)from_name, (uintptr_t)to_name);
6250
6251 #if CONFIG_FSE
6252         if (from_name != NULL && to_name != NULL) {
6253                 if (from_truncated || to_truncated) {
6254                         // set it here since only the from_finfo gets reported up to user space
6255                         from_finfo.mode |= FSE_TRUNCATED_PATH;
6256                 }
6257
6258                 if (tvap && tvp) {
6259                         vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
6260                 }
6261                 if (fvap) {
6262                         vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
6263                 }
6264
6265                 if (tvp) {
6266                         add_fsevent(FSE_RENAME, ctx,
6267                                     FSE_ARG_STRING, from_len, from_name,
6268                                     FSE_ARG_FINFO, &from_finfo,
6269                                     FSE_ARG_STRING, to_len, to_name,
6270                                     FSE_ARG_FINFO, &to_finfo,
6271                                     FSE_ARG_DONE);
6272                 } else {
6273                         add_fsevent(FSE_RENAME, ctx,
6274                                     FSE_ARG_STRING, from_len, from_name,
6275                                     FSE_ARG_FINFO, &from_finfo,
6276                                     FSE_ARG_STRING, to_len, to_name,
6277                                     FSE_ARG_DONE);
6278                 }
6279         }
6280 #endif /* CONFIG_FSE */
6281
6282         /*
6283          * update filesystem's mount point data
6284          */
6285         if (mntrename) {
6286                 char *cp, *pathend, *mpname;
6287                 char * tobuf;
6288                 struct mount *mp;
6289                 int maxlen;
6290                 size_t len = 0;
6291
6292                 mp = fvp->v_mountedhere;
6293
6294                 if (vfs_busy(mp, LK_NOWAIT)) {
6295                         error = EBUSY;
6296                         goto out1;
6297                 }
6298                 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
6299
6300                 error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len);
6301                 if (!error) {
6302                         /* find current mount point prefix */
6303                         pathend = &mp->mnt_vfsstat.f_mntonname[0];
6304                         for (cp = pathend; *cp != '\0'; ++cp) {
6305                                 if (*cp == '/')
6306                                         pathend = cp + 1;
6307                         }
6308                         /* find last component of target name */
6309                         for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
6310                                 if (*cp == '/')
6311                                         mpname = cp + 1;
6312                         }
6313                         /* append name to prefix */
6314                         maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
6315                         bzero(pathend, maxlen);
6316                         strlcpy(pathend, mpname, maxlen);
6317                 }
6318                 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
6319
6320                 vfs_unbusy(mp);
6321         }
6322         /*
6323          * fix up name & parent pointers.  note that we first
6324          * check that fvp has the same name/parent pointers it
6325          * had before the rename call... this is a 'weak' check
6326          * at best...
6327          *
6328          * XXX oparent and oname may not be set in the compound vnop case
6329          */
6330         if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
6331                 int update_flags;
6332
6333                 update_flags = VNODE_UPDATE_NAME;
6334
6335                 if (fdvp != tdvp)
6336                         update_flags |= VNODE_UPDATE_PARENT;
6337
6338                 vnode_update_identity(fvp, tdvp, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen, tond.ni_cnd.cn_hash, update_flags);
6339         }
6340 out1:
6341         if (to_name != NULL) {
6342                 RELEASE_PATH(to_name);
6343                 to_name = NULL;
6344         }
6345         if (from_name != NULL) {
6346                 RELEASE_PATH(from_name);
6347                 from_name = NULL;
6348         }
6349         if (holding_mntlock) {
6350                 mount_unlock_renames(locked_mp);
6351                 mount_drop(locked_mp, 0);
6352                 holding_mntlock = 0;
6353         }
6354         if (tdvp) {
6355                 /*
6356                  * nameidone has to happen before we vnode_put(tdvp)
6357                  * since it may need to release the fs_nodelock on the tdvp
6358                  */
6359                 nameidone(&tond);
6360
6361                 if (tvp)
6362                         vnode_put(tvp);
6363                 vnode_put(tdvp);
6364         }
6365         if (fdvp) {
6366                 /*
6367                  * nameidone has to happen before we vnode_put(fdvp)
6368                  * since it may need to release the fs_nodelock on the fdvp
6369                  */
6370                 nameidone(&fromnd);
6371
6372                 if (fvp)
6373                         vnode_put(fvp);
6374                 vnode_put(fdvp);
6375         }
6376
6377         /*
6378          * If things changed after we did the namei, then we will re-drive
6379          * this rename call from the top.
6380          */
6381         if(do_retry) {
6382                 do_retry = 0;
6383                 goto retry;
6384         }
6385
6386         return (error);
6387 }
6388
6389 /*
6390  * Make a directory file.
6391  *
6392  * Returns:     0                       Success
6393  *              EEXIST
6394  *      namei:???
6395  *      vnode_authorize:???
6396  *      vn_create:???
6397  */
6398 /* ARGSUSED */
6399 static int
6400 mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
6401 {
6402         vnode_t vp, dvp;
6403         int error;
6404         int update_flags = 0;
6405         int batched;
6406         struct nameidata nd;
6407
6408         AUDIT_ARG(mode, vap->va_mode);
6409         NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, UIO_USERSPACE,
6410                path, ctx);
6411         nd.ni_cnd.cn_flags |= WILLBEDIR;
6412         nd.ni_flag = NAMEI_COMPOUNDMKDIR;
6413
6414 continue_lookup:
6415         error = namei(&nd);
6416         if (error)
6417                 return (error);
6418         dvp = nd.ni_dvp;
6419         vp = nd.ni_vp;
6420
6421         if (vp != NULL) {
6422                 error = EEXIST;
6423                 goto out;
6424         }
6425
6426         batched = vnode_compound_mkdir_available(dvp);
6427
6428         VATTR_SET(vap, va_type, VDIR);
6429
6430         /*
6431          * XXX
6432          * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
6433          * only get EXISTS or EISDIR for existing path components, and not that it could see
6434          * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
6435          * it will fail in a spurious  manner.  Need to figure out if this is valid behavior.
6436          */
6437         if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
6438                 if (error == EACCES || error == EPERM) {
6439                         int error2;
6440
6441                         nameidone(&nd);
6442                         vnode_put(dvp);
6443                         dvp = NULLVP;
6444
6445                         /*
6446                          * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
6447                          * rather than EACCESS if the target exists.
6448                          */
6449                         NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, UIO_USERSPACE,
6450                                         path, ctx);
6451                         error2 = namei(&nd);
6452                         if (error2) {
6453                                 goto out;
6454                         } else {
6455                                 vp = nd.ni_vp;
6456                                 error = EEXIST;
6457                                 goto out;
6458                         }
6459                 }
6460
6461                 goto out;
6462         }
6463
6464         /*
6465          * make the directory
6466          */
6467         if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
6468                 if (error == EKEEPLOOKING) {
6469                         nd.ni_vp = vp;
6470                         goto continue_lookup;
6471                 }
6472
6473                 goto out;
6474         }
6475
6476         // Make sure the name & parent pointers are hooked up
6477         if (vp->v_name == NULL)
6478                 update_flags |= VNODE_UPDATE_NAME;
6479         if (vp->v_parent == NULLVP)
6480                 update_flags |= VNODE_UPDATE_PARENT;
6481
6482         if (update_flags)
6483                 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
6484
6485 #if CONFIG_FSE
6486         add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
6487 #endif
6488
6489 out:
6490         /*
6491          * nameidone has to happen before we vnode_put(dvp)
6492          * since it may need to release the fs_nodelock on the dvp
6493          */
6494         nameidone(&nd);
6495
6496         if (vp)
6497                 vnode_put(vp);
6498         if (dvp)
6499                 vnode_put(dvp);
6500
6501         return (error);
6502 }
6503
6504 /*
6505  * mkdir_extended: Create a directory; with extended security (ACL).
6506  *
6507  * Parameters:    p                       Process requesting to create the directory
6508  *                uap                     User argument descriptor (see below)
6509  *                retval                  (ignored)
6510  *
6511  * Indirect:      uap->path               Path of directory to create
6512  *                uap->mode               Access permissions to set
6513  *                uap->xsecurity          ACL to set
6514  *
6515  * Returns:        0                      Success
6516  *                !0                      Not success
6517  *
6518  */
6519 int
6520 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
6521 {
6522         int ciferror;
6523         kauth_filesec_t xsecdst;
6524         struct vnode_attr va;
6525
6526         AUDIT_ARG(owner, uap->uid, uap->gid);
6527
6528         xsecdst = NULL;
6529         if ((uap->xsecurity != USER_ADDR_NULL) &&
6530             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
6531                 return ciferror;
6532
6533         VATTR_INIT(&va);
6534         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
6535         if (xsecdst != NULL)
6536                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6537
6538         ciferror = mkdir1(vfs_context_current(), uap->path, &va);
6539         if (xsecdst != NULL)
6540                 kauth_filesec_free(xsecdst);
6541         return ciferror;
6542 }
6543
6544 int
6545 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
6546 {
6547         struct vnode_attr va;
6548
6549         VATTR_INIT(&va);
6550         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
6551
6552         return(mkdir1(vfs_context_current(), uap->path, &va));
6553 }
6554
6555 /*
6556  * Remove a directory file.
6557  */
6558 /* ARGSUSED */
6559 int
6560 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
6561 {
6562         vnode_t vp, dvp;
6563         int error;
6564         struct nameidata nd;
6565         char     *path = NULL;
6566         int       len=0;
6567         int has_listeners = 0;
6568         int need_event = 0;
6569         int truncated = 0;
6570         vfs_context_t ctx = vfs_context_current();
6571 #if CONFIG_FSE
6572         struct vnode_attr va;
6573 #endif /* CONFIG_FSE */
6574         struct vnode_attr *vap = NULL;
6575         int batched;
6576
6577         int restart_flag;
6578
6579         /*
6580          * This loop exists to restart rmdir in the unlikely case that two
6581          * processes are simultaneously trying to remove the same directory
6582          * containing orphaned appleDouble files.
6583          */
6584         do {
6585                 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
6586                        UIO_USERSPACE, uap->path, ctx);
6587                 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
6588 continue_lookup:
6589                 restart_flag = 0;
6590                 vap = NULL;
6591
6592                 error = namei(&nd);
6593                 if (error)
6594                         return (error);
6595
6596                 dvp = nd.ni_dvp;
6597                 vp = nd.ni_vp;
6598
6599                 if (vp) {
6600                         batched = vnode_compound_rmdir_available(vp);
6601
6602                         if (vp->v_flag & VROOT) {
6603                                 /*
6604                                  * The root of a mounted filesystem cannot be deleted.
6605                                  */
6606                                 error = EBUSY;
6607                                 goto out;
6608                         }
6609
6610                         /*
6611                          * Removed a check here; we used to abort if vp's vid
6612                          * was not the same as what we'd seen the last time around.
6613                          * I do not think that check was valid, because if we retry
6614                          * and all dirents are gone, the directory could legitimately
6615                          * be recycled but still be present in a situation where we would
6616                          * have had permission to delete.  Therefore, we won't make
6617                          * an effort to preserve that check now that we may not have a
6618                          * vp here.
6619                          */
6620
6621                         if (!batched) {
6622                                 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
6623                                 if (error) {
6624                                         goto out;
6625                                 }
6626                         }
6627                 } else {
6628                         batched = 1;
6629
6630                         if (!vnode_compound_rmdir_available(dvp)) {
6631                                 panic("No error, but no compound rmdir?");
6632                         }
6633                 }
6634
6635 #if CONFIG_FSE
6636                 fse_info  finfo;
6637
6638                 need_event = need_fsevent(FSE_DELETE, dvp);
6639                 if (need_event) {
6640                         if (!batched) {
6641                                 get_fse_info(vp, &finfo, ctx);
6642                         } else {
6643                                 error = vfs_get_notify_attributes(&va);
6644                                 if (error) {
6645                                         goto out;
6646                                 }
6647
6648                                 vap = &va;
6649                         }
6650                 }
6651 #endif
6652                 has_listeners = kauth_authorize_fileop_has_listeners();
6653                 if (need_event || has_listeners) {
6654                         if (path == NULL) {
6655                                 GET_PATH(path);
6656                                 if (path == NULL) {
6657                                         error = ENOMEM;
6658                                         goto out;
6659                                 }
6660                         }
6661
6662                         len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
6663 #if CONFIG_FSE
6664                         if (truncated) {
6665                                 finfo.mode |= FSE_TRUNCATED_PATH;
6666                         }
6667 #endif
6668                 }
6669
6670                 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
6671                 nd.ni_vp = vp;
6672                 if (vp == NULLVP) {
6673                         /* Couldn't find a vnode */
6674                         goto out;
6675                 }
6676
6677                 if (error == EKEEPLOOKING) {
6678                         goto continue_lookup;
6679                 }
6680
6681                 /*
6682                  * Special case to remove orphaned AppleDouble
6683                  * files. I don't like putting this in the kernel,
6684                  * but carbon does not like putting this in carbon either,
6685                  * so here we are.
6686                  */
6687                 if (error == ENOTEMPTY) {
6688                         error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
6689                         if (error == EBUSY) {
6690                                 goto out;
6691                         }
6692
6693
6694                         /*
6695                          * Assuming everything went well, we will try the RMDIR again
6696                          */
6697                         if (!error)
6698                                 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
6699                 }
6700
6701                 /*
6702                  * Call out to allow 3rd party notification of delete.
6703                  * Ignore result of kauth_authorize_fileop call.
6704                  */
6705                 if (!error) {
6706                         if (has_listeners) {
6707                                 kauth_authorize_fileop(vfs_context_ucred(ctx),
6708                                                 KAUTH_FILEOP_DELETE,
6709                                                 (uintptr_t)vp,
6710                                                 (uintptr_t)path);
6711                         }
6712
6713                         if (vp->v_flag & VISHARDLINK) {
6714                                 // see the comment in unlink1() about why we update
6715                                 // the parent of a hard link when it is removed
6716                                 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
6717                         }
6718
6719 #if CONFIG_FSE
6720                         if (need_event) {
6721                                 if (vap) {
6722                                         vnode_get_fse_info_from_vap(vp, &finfo, vap);
6723                                 }
6724                                 add_fsevent(FSE_DELETE, ctx,
6725                                                 FSE_ARG_STRING, len, path,
6726                                                 FSE_ARG_FINFO, &finfo,
6727                                                 FSE_ARG_DONE);
6728                         }
6729 #endif
6730                 }
6731
6732 out:
6733                 if (path != NULL) {
6734                         RELEASE_PATH(path);
6735                         path = NULL;
6736                 }
6737                 /*
6738                  * nameidone has to happen before we vnode_put(dvp)
6739                  * since it may need to release the fs_nodelock on the dvp
6740                  */
6741                 nameidone(&nd);
6742                 vnode_put(dvp);
6743
6744                 if (vp)
6745                         vnode_put(vp);
6746
6747                 if (restart_flag == 0) {
6748                         wakeup_one((caddr_t)vp);
6749                         return (error);
6750                 }
6751                 tsleep(vp, PVFS, "rm AD", 1);
6752
6753         } while (restart_flag != 0);
6754
6755         return (error);
6756
6757 }
6758
6759 /* Get direntry length padded to 8 byte alignment */
6760 #define DIRENT64_LEN(namlen) \
6761         ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
6762
6763 static errno_t
6764 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
6765                 int *numdirent, vfs_context_t ctxp)
6766 {
6767         /* Check if fs natively supports VNODE_READDIR_EXTENDED */
6768         if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
6769                    ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0))  {
6770                 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
6771         } else {
6772                 size_t bufsize;
6773                 void * bufptr;
6774                 uio_t auio;
6775                 struct direntry entry64;
6776                 struct dirent *dep;
6777                 int bytesread;
6778                 int error;
6779
6780                 /*
6781                  * Our kernel buffer needs to be smaller since re-packing
6782                  * will expand each dirent.  The worse case (when the name
6783                  * length is 3) corresponds to a struct direntry size of 32
6784                  * bytes (8-byte aligned) and a struct dirent size of 12 bytes
6785                  * (4-byte aligned).  So having a buffer that is 3/8 the size
6786                  * will prevent us from reading more than we can pack.
6787                  *
6788                  * Since this buffer is wired memory, we will limit the
6789                  * buffer size to a maximum of 32K. We would really like to
6790                  * use 32K in the MIN(), but we use magic number 87371 to
6791                  * prevent uio_resid() * 3 / 8 from overflowing.
6792                  */
6793                 bufsize = 3 * MIN(uio_resid(uio), 87371) / 8;
6794                 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
6795                 if (bufptr == NULL) {
6796                         return ENOMEM;
6797                 }
6798
6799                 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
6800                 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
6801                 auio->uio_offset = uio->uio_offset;
6802
6803                 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
6804
6805                 dep = (struct dirent *)bufptr;
6806                 bytesread = bufsize - uio_resid(auio);
6807
6808                 /*
6809                  * Convert all the entries and copy them out to user's buffer.
6810                  */
6811                 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
6812                         /* Convert a dirent to a dirent64. */
6813                         entry64.d_ino = dep->d_ino;
6814                         entry64.d_seekoff = 0;
6815                         entry64.d_reclen = DIRENT64_LEN(dep->d_namlen);
6816                         entry64.d_namlen = dep->d_namlen;
6817                         entry64.d_type = dep->d_type;
6818                         bcopy(dep->d_name, entry64.d_name, dep->d_namlen + 1);
6819
6820                         /* Move to next entry. */
6821                         dep = (struct dirent *)((char *)dep + dep->d_reclen);
6822
6823                         /* Copy entry64 to user's buffer. */
6824                         error = uiomove((caddr_t)&entry64, entry64.d_reclen, uio);
6825                 }
6826
6827                 /* Update the real offset using the offset we got from VNOP_READDIR. */
6828                 if (error == 0) {
6829                         uio->uio_offset = auio->uio_offset;
6830                 }
6831                 uio_free(auio);
6832                 FREE(bufptr, M_TEMP);
6833                 return (error);
6834         }
6835 }
6836
6837 /*
6838  * Read a block of directory entries in a file system independent format.
6839  */
6840 static int
6841 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
6842                      off_t *offset, int flags)
6843 {
6844         vnode_t vp;
6845         struct vfs_context context = *vfs_context_current();    /* local copy */
6846         struct fileproc *fp;
6847         uio_t auio;
6848         int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6849         off_t loff;
6850         int error, eofflag, numdirent;
6851         char uio_buf[ UIO_SIZEOF(1) ];
6852
6853         error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
6854         if (error) {
6855                 return (error);
6856         }
6857         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
6858                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6859                 error = EBADF;
6860                 goto out;
6861         }
6862
6863 #if CONFIG_MACF
6864         error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
6865         if (error)
6866                 goto out;
6867 #endif
6868         if ( (error = vnode_getwithref(vp)) ) {
6869                 goto out;
6870         }
6871         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6872
6873 unionread:
6874         if (vp->v_type != VDIR) {
6875                 (void)vnode_put(vp);
6876                 error = EINVAL;
6877                 goto out;
6878         }
6879
6880 #if CONFIG_MACF
6881         error = mac_vnode_check_readdir(&context, vp);
6882         if (error != 0) {
6883                 (void)vnode_put(vp);
6884                 goto out;
6885         }
6886 #endif /* MAC */
6887
6888         loff = fp->f_fglob->fg_offset;
6889         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
6890         uio_addiov(auio, bufp, bufsize);
6891
6892         if (flags & VNODE_READDIR_EXTENDED) {
6893                 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
6894                 fp->f_fglob->fg_offset = uio_offset(auio);
6895         } else {
6896                 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
6897                 fp->f_fglob->fg_offset = uio_offset(auio);
6898         }
6899         if (error) {
6900                 (void)vnode_put(vp);
6901                 goto out;
6902         }
6903
6904         if ((user_ssize_t)bufsize == uio_resid(auio)){
6905                 if (union_dircheckp) {
6906                         error = union_dircheckp(&vp, fp, &context);
6907                         if (error == -1)
6908                                 goto unionread;
6909                         if (error)
6910                                 goto out;
6911                 }
6912
6913                 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) {
6914                         struct vnode *tvp = vp;
6915                         vp = vp->v_mount->mnt_vnodecovered;
6916                         vnode_getwithref(vp);
6917                         vnode_ref(vp);
6918                         fp->f_fglob->fg_data = (caddr_t) vp;
6919                         fp->f_fglob->fg_offset = 0;
6920                         vnode_rele(tvp);
6921                         vnode_put(tvp);
6922                         goto unionread;
6923                 }
6924         }
6925
6926         vnode_put(vp);
6927         if (offset) {
6928                 *offset = loff;
6929         }
6930
6931         *bytesread = bufsize - uio_resid(auio);
6932 out:
6933         file_drop(fd);
6934         return (error);
6935 }
6936
6937
6938 int
6939 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
6940 {
6941         off_t offset;
6942         ssize_t bytesread;
6943         int error;
6944
6945         AUDIT_ARG(fd, uap->fd);
6946         error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
6947
6948         if (error == 0) {
6949                 if (proc_is64bit(p)) {
6950                         user64_long_t base = (user64_long_t)offset;
6951                         error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
6952                 } else {
6953                         user32_long_t base = (user32_long_t)offset;
6954                         error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
6955                 }
6956                 *retval = bytesread;
6957         }
6958         return (error);
6959 }
6960
6961 int
6962 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
6963 {
6964         off_t offset;
6965         ssize_t bytesread;
6966         int error;
6967
6968         AUDIT_ARG(fd, uap->fd);
6969         error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
6970
6971         if (error == 0) {
6972                 *retval = bytesread;
6973                 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
6974         }
6975         return (error);
6976 }
6977
6978
6979 /*
6980  * Set the mode mask for creation of filesystem nodes.
6981  * XXX implement xsecurity
6982  */
6983 #define UMASK_NOXSECURITY        (void *)1      /* leave existing xsecurity alone */
6984 static int
6985 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
6986 {
6987         struct filedesc *fdp;
6988
6989         AUDIT_ARG(mask, newmask);
6990         proc_fdlock(p);
6991         fdp = p->p_fd;
6992         *retval = fdp->fd_cmask;
6993         fdp->fd_cmask = newmask & ALLPERMS;
6994         proc_fdunlock(p);
6995         return (0);
6996 }
6997
6998 /*
6999  * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
7000  *
7001  * Parameters:    p                       Process requesting to set the umask
7002  *                uap                     User argument descriptor (see below)
7003  *                retval                  umask of the process (parameter p)
7004  *
7005  * Indirect:      uap->newmask            umask to set
7006  *                uap->xsecurity          ACL to set
7007  *
7008  * Returns:        0                      Success
7009  *                !0                      Not success
7010  *
7011  */
7012 int
7013 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
7014 {
7015         int ciferror;
7016         kauth_filesec_t xsecdst;
7017
7018         xsecdst = KAUTH_FILESEC_NONE;
7019         if (uap->xsecurity != USER_ADDR_NULL) {
7020                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
7021                         return ciferror;
7022         } else {
7023                 xsecdst = KAUTH_FILESEC_NONE;
7024         }
7025
7026         ciferror = umask1(p, uap->newmask, xsecdst, retval);
7027
7028         if (xsecdst != KAUTH_FILESEC_NONE)
7029                 kauth_filesec_free(xsecdst);
7030         return ciferror;
7031 }
7032
7033 int
7034 umask(proc_t p, struct umask_args *uap, int32_t *retval)
7035 {
7036         return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
7037 }
7038
7039 /*
7040  * Void all references to file by ripping underlying filesystem
7041  * away from vnode.
7042  */
7043 /* ARGSUSED */
7044 int
7045 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
7046 {
7047         vnode_t vp;
7048         struct vnode_attr va;
7049         vfs_context_t ctx = vfs_context_current();
7050         int error;
7051         struct nameidata nd;
7052
7053         NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
7054                uap->path, ctx);
7055         error = namei(&nd);
7056         if (error)
7057                 return (error);
7058         vp = nd.ni_vp;
7059
7060         nameidone(&nd);
7061
7062         if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
7063                 error = ENOTSUP;
7064                 goto out;
7065         }
7066
7067         if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
7068                 error = EBUSY;
7069                 goto out;
7070         }
7071
7072 #if CONFIG_MACF
7073         error = mac_vnode_check_revoke(ctx, vp);
7074         if (error)
7075                 goto out;
7076 #endif
7077
7078         VATTR_INIT(&va);
7079         VATTR_WANTED(&va, va_uid);
7080         if ((error = vnode_getattr(vp, &va, ctx)))
7081                 goto out;
7082         if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
7083             (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
7084                 goto out;
7085         if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
7086                 VNOP_REVOKE(vp, REVOKEALL, ctx);
7087 out:
7088         vnode_put(vp);
7089         return (error);
7090 }
7091
7092
7093 /*
7094  *  HFS/HFS PlUS SPECIFIC SYSTEM CALLS
7095  *  The following system calls are designed to support features
7096  *  which are specific to the HFS & HFS Plus volume formats
7097  */
7098
7099 #ifdef __APPLE_API_OBSOLETE
7100
7101 /************************************************/
7102 /* *** Following calls will be deleted soon *** */
7103 /************************************************/
7104
7105 /*
7106  * Make a complex file.  A complex file is one with multiple forks (data streams)
7107  */
7108 /* ARGSUSED */
7109 int
7110 mkcomplex(__unused proc_t p, __unused struct mkcomplex_args *uap, __unused int32_t *retval)
7111 {
7112         return (ENOTSUP);
7113 }
7114
7115 /*
7116  * Extended stat call which returns volumeid and vnodeid as well as other info
7117  */
7118 /* ARGSUSED */
7119 int
7120 statv(__unused proc_t p,
7121           __unused struct statv_args *uap,
7122           __unused int32_t *retval)
7123 {
7124         return (ENOTSUP);       /*  We'll just return an error for now */
7125
7126 } /* end of statv system call */
7127
7128 /*
7129 * Extended lstat call which returns volumeid and vnodeid as well as other info
7130 */
7131 /* ARGSUSED */
7132 int
7133 lstatv(__unused proc_t p,
7134            __unused struct lstatv_args *uap,
7135            __unused int32_t *retval)
7136 {
7137        return (ENOTSUP);        /*  We'll just return an error for now */
7138 } /* end of lstatv system call */
7139
7140 /*
7141 * Extended fstat call which returns volumeid and vnodeid as well as other info
7142 */
7143 /* ARGSUSED */
7144 int
7145 fstatv(__unused proc_t p,
7146            __unused struct fstatv_args *uap,
7147            __unused int32_t *retval)
7148 {
7149        return (ENOTSUP);        /*  We'll just return an error for now */
7150 } /* end of fstatv system call */
7151
7152
7153 /************************************************/
7154 /* *** Preceding calls will be deleted soon *** */
7155 /************************************************/
7156
7157 #endif /* __APPLE_API_OBSOLETE */
7158
7159 /*
7160 * Obtain attribute information on objects in a directory while enumerating
7161 * the directory.  This call does not yet support union mounted directories.
7162 * TO DO
7163 *  1.union mounted directories.
7164 */
7165
7166 /* ARGSUSED */
7167 int
7168 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
7169 {
7170         vnode_t vp;
7171         struct fileproc *fp;
7172         uio_t auio = NULL;
7173         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7174         uint32_t count;
7175         uint32_t newstate;
7176         int error, eofflag;
7177         uint32_t loff;
7178         struct attrlist attributelist;
7179         vfs_context_t ctx = vfs_context_current();
7180         int fd = uap->fd;
7181         char uio_buf[ UIO_SIZEOF(1) ];
7182         kauth_action_t action;
7183
7184         AUDIT_ARG(fd, fd);
7185
7186         /* Get the attributes into kernel space */
7187         if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
7188                 return(error);
7189         }
7190         if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
7191                 return(error);
7192         }
7193         if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
7194                 return (error);
7195         }
7196         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7197                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7198                 error = EBADF;
7199                 goto out;
7200         }
7201
7202
7203 #if CONFIG_MACF
7204         error = mac_file_check_change_offset(vfs_context_ucred(ctx),
7205             fp->f_fglob);
7206         if (error)
7207                 goto out;
7208 #endif
7209
7210
7211         if ( (error = vnode_getwithref(vp)) )
7212                 goto out;
7213
7214         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7215
7216         if (vp->v_type != VDIR) {
7217                 (void)vnode_put(vp);
7218                 error = EINVAL;
7219                 goto out;
7220         }
7221
7222 #if CONFIG_MACF
7223         error = mac_vnode_check_readdir(ctx, vp);
7224         if (error != 0) {
7225                 (void)vnode_put(vp);
7226                 goto out;
7227         }
7228 #endif /* MAC */
7229
7230         /* set up the uio structure which will contain the users return buffer */
7231         loff = fp->f_fglob->fg_offset;
7232         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ,
7233             &uio_buf[0], sizeof(uio_buf));
7234         uio_addiov(auio, uap->buffer, uap->buffersize);
7235
7236         /*
7237          * If the only item requested is file names, we can let that past with
7238          * just LIST_DIRECTORY.  If they want any other attributes, that means
7239          * they need SEARCH as well.
7240          */
7241         action = KAUTH_VNODE_LIST_DIRECTORY;
7242         if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
7243             attributelist.fileattr || attributelist.dirattr)
7244                 action |= KAUTH_VNODE_SEARCH;
7245
7246         if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
7247
7248                 /* Believe it or not, uap->options only has 32-bits of valid
7249                  * info, so truncate before extending again */
7250                 error = VNOP_READDIRATTR(vp, &attributelist, auio,
7251                                          count,
7252                                          (u_long)(uint32_t)uap->options, &newstate, &eofflag,
7253                                          &count, ctx);
7254         }
7255         (void)vnode_put(vp);
7256
7257         if (error)
7258                 goto out;
7259         fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
7260
7261         if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
7262                 goto out;
7263         if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
7264                 goto out;
7265         if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
7266                 goto out;
7267
7268         *retval = eofflag;  /* similar to getdirentries */
7269         error = 0;
7270 out:
7271         file_drop(fd);
7272         return (error); /* return error earlier, an retval of 0 or 1 now */
7273
7274 } /* end of getdirentryattr system call */
7275
7276 /*
7277 * Exchange data between two files
7278 */
7279
7280 /* ARGSUSED */
7281 int
7282 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
7283 {
7284
7285         struct nameidata fnd, snd;
7286         vfs_context_t ctx = vfs_context_current();
7287         vnode_t fvp;
7288         vnode_t svp;
7289         int error;
7290         u_int32_t nameiflags;
7291         char *fpath = NULL;
7292         char *spath = NULL;
7293         int   flen=0, slen=0;
7294         int from_truncated=0, to_truncated=0;
7295 #if CONFIG_FSE
7296         fse_info f_finfo, s_finfo;
7297 #endif
7298
7299         nameiflags = 0;
7300         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
7301
7302         NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
7303                UIO_USERSPACE, uap->path1, ctx);
7304
7305         error = namei(&fnd);
7306         if (error)
7307                 goto out2;
7308
7309         nameidone(&fnd);
7310         fvp = fnd.ni_vp;
7311
7312         NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
7313                UIO_USERSPACE, uap->path2, ctx);
7314
7315         error = namei(&snd);
7316         if (error) {
7317                 vnode_put(fvp);
7318                 goto out2;
7319         }
7320         nameidone(&snd);
7321         svp = snd.ni_vp;
7322
7323         /*
7324          * if the files are the same, return an inval error
7325          */
7326         if (svp == fvp) {
7327                 error = EINVAL;
7328                 goto out;
7329         }
7330
7331         /*
7332          * if the files are on different volumes, return an error
7333          */
7334         if (svp->v_mount != fvp->v_mount) {
7335                 error = EXDEV;
7336                 goto out;
7337         }
7338
7339 #if CONFIG_MACF
7340         error = mac_vnode_check_exchangedata(ctx,
7341             fvp, svp);
7342         if (error)
7343                 goto out;
7344 #endif
7345         if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
7346             ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
7347                 goto out;
7348
7349         if (
7350 #if CONFIG_FSE
7351         need_fsevent(FSE_EXCHANGE, fvp) ||
7352 #endif
7353         kauth_authorize_fileop_has_listeners()) {
7354                 GET_PATH(fpath);
7355                 GET_PATH(spath);
7356                 if (fpath == NULL || spath == NULL) {
7357                         error = ENOMEM;
7358                         goto out;
7359                 }
7360
7361                 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
7362                 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
7363
7364 #if CONFIG_FSE
7365                 get_fse_info(fvp, &f_finfo, ctx);
7366                 get_fse_info(svp, &s_finfo, ctx);
7367                 if (from_truncated || to_truncated) {
7368                         // set it here since only the f_finfo gets reported up to user space
7369                         f_finfo.mode |= FSE_TRUNCATED_PATH;
7370                 }
7371 #endif
7372         }
7373         /* Ok, make the call */
7374         error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
7375
7376         if (error == 0) {
7377             const char *tmpname;
7378
7379             if (fpath != NULL && spath != NULL) {
7380                     /* call out to allow 3rd party notification of exchangedata.
7381                      * Ignore result of kauth_authorize_fileop call.
7382                      */
7383                     kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
7384                                            (uintptr_t)fpath, (uintptr_t)spath);
7385             }
7386             name_cache_lock();
7387
7388             tmpname     = fvp->v_name;
7389             fvp->v_name = svp->v_name;
7390             svp->v_name = tmpname;
7391
7392             if (fvp->v_parent != svp->v_parent) {
7393                 vnode_t tmp;
7394
7395                 tmp           = fvp->v_parent;
7396                 fvp->v_parent = svp->v_parent;
7397                 svp->v_parent = tmp;
7398             }
7399             name_cache_unlock();
7400
7401 #if CONFIG_FSE
7402             if (fpath != NULL && spath != NULL) {
7403                     add_fsevent(FSE_EXCHANGE, ctx,
7404                                 FSE_ARG_STRING, flen, fpath,
7405                                 FSE_ARG_FINFO, &f_finfo,
7406                                 FSE_ARG_STRING, slen, spath,
7407                                 FSE_ARG_FINFO, &s_finfo,
7408                                 FSE_ARG_DONE);
7409             }
7410 #endif
7411         }
7412
7413 out:
7414         if (fpath != NULL)
7415                 RELEASE_PATH(fpath);
7416         if (spath != NULL)
7417                 RELEASE_PATH(spath);
7418         vnode_put(svp);
7419         vnode_put(fvp);
7420 out2:
7421         return (error);
7422 }
7423
7424
7425 /* ARGSUSED */
7426
7427 int
7428 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
7429 {
7430         vnode_t vp;
7431         int error=0;
7432         int fserror = 0;
7433         struct nameidata nd;
7434         struct user64_fssearchblock searchblock;
7435         struct searchstate *state;
7436         struct attrlist *returnattrs;
7437         struct timeval timelimit;
7438         void *searchparams1,*searchparams2;
7439         uio_t auio = NULL;
7440         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7441         uint32_t nummatches;
7442         int mallocsize;
7443         uint32_t nameiflags;
7444         vfs_context_t ctx = vfs_context_current();
7445         char uio_buf[ UIO_SIZEOF(1) ];
7446
7447         /* Start by copying in fsearchblock paramater list */
7448     if (IS_64BIT_PROCESS(p)) {
7449         error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
7450         timelimit.tv_sec = searchblock.timelimit.tv_sec;
7451         timelimit.tv_usec = searchblock.timelimit.tv_usec;
7452     }
7453     else {
7454         struct user32_fssearchblock tmp_searchblock;
7455
7456         error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
7457         // munge into 64-bit version
7458         searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
7459         searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
7460         searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
7461         searchblock.maxmatches = tmp_searchblock.maxmatches;
7462                 /*
7463                  * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
7464                  * from a 32 bit long, and tv_usec is already a signed 32 bit int.
7465                  */
7466         timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
7467         timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
7468         searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
7469         searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
7470         searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
7471         searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
7472         searchblock.searchattrs = tmp_searchblock.searchattrs;
7473     }
7474         if (error)
7475                 return(error);
7476
7477         /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
7478          */
7479         if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
7480                 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
7481                 return(EINVAL);
7482
7483         /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
7484         /* It all has to do into local memory and it's not that big so we might as well  put it all together. */
7485         /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
7486         /* block.                                                                                             */
7487
7488         mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
7489                       sizeof(struct attrlist) + sizeof(struct searchstate);
7490
7491         MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
7492
7493         /* Now set up the various pointers to the correct place in our newly allocated memory */
7494
7495         searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
7496         returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
7497         state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
7498
7499         /* Now copy in the stuff given our local variables. */
7500
7501         if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
7502                 goto freeandexit;
7503
7504         if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
7505                 goto freeandexit;
7506
7507         if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
7508                 goto freeandexit;
7509
7510         if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
7511                 goto freeandexit;
7512
7513
7514         /*
7515          * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
7516          * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
7517          * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
7518          * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
7519          * validate the user-supplied data offset of the attrreference_t, we'll do it here.
7520          */
7521
7522         if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
7523                 attrreference_t* string_ref;
7524                 u_int32_t* start_length;
7525                 user64_size_t param_length;
7526
7527                 /* validate searchparams1 */
7528                 param_length = searchblock.sizeofsearchparams1;
7529                 /* skip the word that specifies length of the buffer */
7530                 start_length= (u_int32_t*) searchparams1;
7531                 start_length= start_length+1;
7532                 string_ref= (attrreference_t*) start_length;
7533
7534                 /* ensure no negative offsets or too big offsets */
7535                 if (string_ref->attr_dataoffset < 0 ) {
7536                         error = EINVAL;
7537                         goto freeandexit;
7538                 }
7539                 if (string_ref->attr_length > MAXPATHLEN) {
7540                         error = EINVAL;
7541                         goto freeandexit;
7542                 }
7543
7544                 /* Check for pointer overflow in the string ref */
7545                 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
7546                         error = EINVAL;
7547                         goto freeandexit;
7548                 }
7549
7550                 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
7551                         error = EINVAL;
7552                         goto freeandexit;
7553                 }
7554                 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
7555                         error = EINVAL;
7556                         goto freeandexit;
7557                 }
7558         }
7559
7560         /* set up the uio structure which will contain the users return buffer */
7561         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
7562                                                                   &uio_buf[0], sizeof(uio_buf));
7563     uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
7564
7565         nameiflags = 0;
7566         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
7567         NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
7568                UIO_USERSPACE, uap->path, ctx);
7569
7570         error = namei(&nd);
7571         if (error)
7572                 goto freeandexit;
7573
7574         nameidone(&nd);
7575         vp = nd.ni_vp;
7576
7577 #if CONFIG_MACF
7578         error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
7579         if (error) {
7580                 vnode_put(vp);
7581                 goto freeandexit;
7582         }
7583 #endif
7584
7585
7586         /*
7587          * If searchblock.maxmatches == 0, then skip the search. This has happened
7588          * before and sometimes the underlyning code doesnt deal with it well.
7589          */
7590          if (searchblock.maxmatches == 0) {
7591                 nummatches = 0;
7592                 goto saveandexit;
7593          }
7594
7595         /*
7596            Allright, we have everything we need, so lets make that call.
7597
7598            We keep special track of the return value from the file system:
7599            EAGAIN is an acceptable error condition that shouldn't keep us
7600            from copying out any results...
7601          */
7602
7603         fserror = VNOP_SEARCHFS(vp,
7604                                                         searchparams1,
7605                                                         searchparams2,
7606                                                         &searchblock.searchattrs,
7607                                                         (u_long)searchblock.maxmatches,
7608                                                         &timelimit,
7609                                                         returnattrs,
7610                                                         &nummatches,
7611                                                         (u_long)uap->scriptcode,
7612                                                         (u_long)uap->options,
7613                                                         auio,
7614                                                         state,
7615                                                         ctx);
7616
7617 saveandexit:
7618
7619         vnode_put(vp);
7620
7621         /* Now copy out the stuff that needs copying out. That means the number of matches, the
7622            search state.  Everything was already put into he return buffer by the vop call. */
7623
7624         if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
7625                 goto freeandexit;
7626
7627     if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
7628                 goto freeandexit;
7629
7630         error = fserror;
7631
7632 freeandexit:
7633
7634         FREE(searchparams1,M_TEMP);
7635
7636         return(error);
7637
7638
7639 } /* end of searchfs system call */
7640
7641
7642
7643 lck_grp_attr_t *  nspace_group_attr;
7644 lck_attr_t *      nspace_lock_attr;
7645 lck_grp_t *       nspace_mutex_group;
7646
7647 lck_mtx_t         nspace_handler_lock;
7648 lck_mtx_t         nspace_handler_exclusion_lock;
7649
7650 time_t snapshot_timestamp=0;
7651 int nspace_allow_virtual_devs=0;
7652
7653 void nspace_handler_init(void);
7654
7655 typedef struct nspace_item_info {
7656         struct vnode *vp;
7657         void         *arg;
7658         uint64_t      op;
7659         uint32_t      vid;
7660         uint32_t      flags;
7661         uint32_t      token;
7662         uint32_t      refcount;
7663 } nspace_item_info;
7664
7665 #define MAX_NSPACE_ITEMS   128
7666 nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
7667 uint32_t      nspace_item_idx=0;              // also used as the sleep/wakeup rendezvous address
7668 uint32_t      nspace_token_id=0;
7669 uint32_t      nspace_handler_timeout = 15;    // seconds
7670
7671 #define NSPACE_ITEM_NEW         0x0001
7672 #define NSPACE_ITEM_PROCESSING  0x0002
7673 #define NSPACE_ITEM_DEAD        0x0004
7674 #define NSPACE_ITEM_CANCELLED   0x0008
7675 #define NSPACE_ITEM_DONE        0x0010
7676 #define NSPACE_ITEM_RESET_TIMER 0x0020
7677
7678 #define NSPACE_ITEM_NSPACE_EVENT   0x0040
7679 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
7680 #define NSPACE_ITEM_TRACK_EVENT    0x0100
7681
7682 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT | NSPACE_ITEM_TRACK_EVENT)
7683
7684 //#pragma optimization_level 0
7685
7686 typedef enum {
7687         NSPACE_HANDLER_NSPACE = 0,
7688         NSPACE_HANDLER_SNAPSHOT = 1,
7689         NSPACE_HANDLER_TRACK = 2,
7690
7691         NSPACE_HANDLER_COUNT,
7692 } nspace_type_t;
7693
7694 typedef struct {
7695         uint64_t handler_tid;
7696         struct proc *handler_proc;
7697         int handler_busy;
7698 } nspace_handler_t;
7699
7700 nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
7701
7702 static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
7703 {
7704         switch(nspace_type) {
7705                 case NSPACE_HANDLER_NSPACE:
7706                         return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
7707                 case NSPACE_HANDLER_SNAPSHOT:
7708                         return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
7709                 case NSPACE_HANDLER_TRACK:
7710                         return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_TRACK_EVENT;
7711                 default:
7712                         printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
7713                         return 0;
7714         }
7715 }
7716
7717 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
7718 {
7719         switch(nspace_type) {
7720                 case NSPACE_HANDLER_NSPACE:
7721                         return NSPACE_ITEM_NSPACE_EVENT;
7722                 case NSPACE_HANDLER_SNAPSHOT:
7723                         return NSPACE_ITEM_SNAPSHOT_EVENT;
7724                 case NSPACE_HANDLER_TRACK:
7725                         return NSPACE_ITEM_TRACK_EVENT;
7726                 default:
7727                         printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
7728                         return 0;
7729         }
7730 }
7731
7732 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
7733 {
7734         switch(nspace_type) {
7735                 case NSPACE_HANDLER_NSPACE:
7736                         return FREAD | FWRITE | O_EVTONLY;
7737                 case NSPACE_HANDLER_SNAPSHOT:
7738                 case NSPACE_HANDLER_TRACK:
7739                         return FREAD | O_EVTONLY;
7740                 default:
7741                         printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
7742                         return 0;
7743         }
7744 }
7745
7746 static inline nspace_type_t nspace_type_for_op(uint64_t op)
7747 {
7748         switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
7749                 case NAMESPACE_HANDLER_NSPACE_EVENT:
7750                         return NSPACE_HANDLER_NSPACE;
7751                 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
7752                         return NSPACE_HANDLER_SNAPSHOT;
7753                 case NAMESPACE_HANDLER_TRACK_EVENT:
7754                         return NSPACE_HANDLER_TRACK;
7755                 default:
7756                         printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
7757                         return NSPACE_HANDLER_NSPACE;
7758         }
7759 }
7760
7761 static inline int nspace_is_special_process(struct proc *proc)
7762 {
7763         int i;
7764         for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
7765                 if (proc == nspace_handlers[i].handler_proc)
7766                         return 1;
7767         }
7768         return 0;
7769 }
7770
7771 void
7772 nspace_handler_init(void)
7773 {
7774         nspace_lock_attr    = lck_attr_alloc_init();
7775         nspace_group_attr   = lck_grp_attr_alloc_init();
7776         nspace_mutex_group  = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
7777         lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
7778         lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
7779         memset(&nspace_items[0], 0, sizeof(nspace_items));
7780 }
7781
7782 void
7783 nspace_proc_exit(struct proc *p)
7784 {
7785         int i, event_mask = 0;
7786
7787         for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
7788                 if (p == nspace_handlers[i].handler_proc) {
7789                         event_mask |= nspace_item_flags_for_type(i);
7790                         nspace_handlers[i].handler_tid = 0;
7791                         nspace_handlers[i].handler_proc = NULL;
7792                 }
7793         }
7794
7795         if (event_mask == 0) {
7796                 return;
7797         }
7798
7799         if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
7800                 // if this process was the snapshot handler, zero snapshot_timeout
7801                 snapshot_timestamp = 0;
7802         }
7803
7804         //
7805         // unblock anyone that's waiting for the handler that died
7806         //
7807         lck_mtx_lock(&nspace_handler_lock);
7808         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
7809                 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
7810
7811                         if ( nspace_items[i].flags & event_mask ) {
7812
7813                                 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
7814                                         vnode_lock_spin(nspace_items[i].vp);
7815                                         nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
7816                                         vnode_unlock(nspace_items[i].vp);
7817                                 }
7818                                 nspace_items[i].vp = NULL;
7819                                 nspace_items[i].vid = 0;
7820                                 nspace_items[i].flags = NSPACE_ITEM_DONE;
7821                                 nspace_items[i].token = 0;
7822
7823                                 wakeup((caddr_t)&(nspace_items[i].vp));
7824                         }
7825                 }
7826         }
7827
7828         wakeup((caddr_t)&nspace_item_idx);
7829         lck_mtx_unlock(&nspace_handler_lock);
7830 }
7831
7832
7833 int
7834 resolve_nspace_item(struct vnode *vp, uint64_t op)
7835 {
7836         return resolve_nspace_item_ext(vp, op, NULL);
7837 }
7838
7839 int
7840 resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
7841 {
7842         int i, error, keep_waiting;
7843         struct timespec ts;
7844         nspace_type_t nspace_type = nspace_type_for_op(op);
7845
7846         // only allow namespace events on regular files, directories and symlinks.
7847         if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
7848                 return 0;
7849         }
7850
7851         //
7852         // if this is a snapshot event and the vnode is on a
7853         // disk image just pretend nothing happened since any
7854         // change to the disk image will cause the disk image
7855         // itself to get backed up and this avoids multi-way
7856         // deadlocks between the snapshot handler and the ever
7857         // popular diskimages-helper process.  the variable
7858         // nspace_allow_virtual_devs allows this behavior to
7859         // be overridden (for use by the Mobile TimeMachine
7860         // testing infrastructure which uses disk images)
7861         //
7862         if (   (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
7863             && (vp->v_mount != NULL)
7864             && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
7865             && !nspace_allow_virtual_devs) {
7866
7867                 return 0;
7868         }
7869
7870         // if (thread_tid(current_thread()) == namespace_handler_tid) {
7871         if (nspace_handlers[nspace_type].handler_proc == NULL) {
7872                 return 0;
7873         }
7874
7875         if (nspace_is_special_process(current_proc())) {
7876                 return EDEADLK;
7877         }
7878
7879         lck_mtx_lock(&nspace_handler_lock);
7880
7881 retry:
7882         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
7883                 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
7884                         break;
7885                 }
7886         }
7887
7888         if (i >= MAX_NSPACE_ITEMS) {
7889                 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
7890                         if (nspace_items[i].flags == 0) {
7891                                 break;
7892                         }
7893                 }
7894         } else {
7895                 nspace_items[i].refcount++;
7896         }
7897
7898         if (i >= MAX_NSPACE_ITEMS) {
7899                 ts.tv_sec = nspace_handler_timeout;
7900                 ts.tv_nsec = 0;
7901
7902                 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
7903                 if (error == 0) {
7904                         // an entry got free'd up, go see if we can get a slot
7905                         goto retry;
7906                 } else {
7907                         lck_mtx_unlock(&nspace_handler_lock);
7908                         return error;
7909                 }
7910         }
7911
7912         //
7913         // if it didn't already exist, add it.  if it did exist
7914         // we'll get woken up when someone does a wakeup() on
7915         // the slot in the nspace_items table.
7916         //
7917         if (vp != nspace_items[i].vp) {
7918                 nspace_items[i].vp = vp;
7919                 nspace_items[i].arg = arg;
7920                 nspace_items[i].op = op;
7921                 nspace_items[i].vid = vnode_vid(vp);
7922                 nspace_items[i].flags = NSPACE_ITEM_NEW;
7923                 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
7924                 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
7925                         if (arg) {
7926                                 vnode_lock_spin(vp);
7927                                 vp->v_flag |= VNEEDSSNAPSHOT;
7928                                 vnode_unlock(vp);
7929                         }
7930                 }
7931
7932                 nspace_items[i].token = 0;
7933                 nspace_items[i].refcount = 1;
7934
7935                 wakeup((caddr_t)&nspace_item_idx);
7936         }
7937
7938         //
7939         // Now go to sleep until the handler does a wakeup on this
7940         // slot in the nspace_items table (or we timeout).
7941         //
7942         keep_waiting = 1;
7943         while(keep_waiting) {
7944                 ts.tv_sec = nspace_handler_timeout;
7945                 ts.tv_nsec = 0;
7946                 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
7947
7948                 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
7949                         error = 0;
7950                 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
7951                         error = nspace_items[i].token;
7952                 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
7953                         if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
7954                                 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
7955                                 continue;
7956                         } else {
7957                                 error = ETIMEDOUT;
7958                         }
7959                 } else if (error == 0) {
7960                         // hmmm, why did we get woken up?
7961                         printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
7962                                nspace_items[i].token);
7963                 }
7964
7965                 if (--nspace_items[i].refcount == 0) {
7966                         nspace_items[i].vp = NULL;     // clear this so that no one will match on it again
7967                         nspace_items[i].arg = NULL;
7968                         nspace_items[i].token = 0;     // clear this so that the handler will not find it anymore
7969                         nspace_items[i].flags = 0;     // this clears it for re-use
7970                 }
7971                 wakeup(&nspace_token_id);
7972                 keep_waiting = 0;
7973         }
7974
7975         lck_mtx_unlock(&nspace_handler_lock);
7976
7977         return error;
7978 }
7979
7980
7981 int
7982 get_nspace_item_status(struct vnode *vp, int32_t *status)
7983 {
7984         int i;
7985
7986         lck_mtx_lock(&nspace_handler_lock);
7987         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
7988                 if (nspace_items[i].vp == vp) {
7989                         break;
7990                 }
7991         }
7992
7993         if (i >= MAX_NSPACE_ITEMS) {
7994                 lck_mtx_unlock(&nspace_handler_lock);
7995                 return ENOENT;
7996         }
7997
7998         *status = nspace_items[i].flags;
7999         lck_mtx_unlock(&nspace_handler_lock);
8000         return 0;
8001 }
8002
8003
8004 #if 0
8005 static int
8006 build_volfs_path(struct vnode *vp, char *path, int *len)
8007 {
8008         struct vnode_attr va;
8009         int ret;
8010
8011         VATTR_INIT(&va);
8012         VATTR_WANTED(&va, va_fsid);
8013         VATTR_WANTED(&va, va_fileid);
8014
8015         if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
8016                 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
8017                 ret = -1;
8018         } else {
8019                 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
8020                 ret = 0;
8021         }
8022
8023         return ret;
8024 }
8025 #endif
8026
8027 //
8028 // Note: this function does NOT check permissions on all of the
8029 // parent directories leading to this vnode.  It should only be
8030 // called on behalf of a root process.  Otherwise a process may
8031 // get access to a file because the file itself is readable even
8032 // though its parent directories would prevent access.
8033 //
8034 static int
8035 vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
8036 {
8037         int error, action;
8038
8039         if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8040                 return error;
8041         }
8042
8043 #if CONFIG_MACF
8044         error = mac_vnode_check_open(ctx, vp, fmode);
8045         if (error)
8046                 return error;
8047 #endif
8048
8049         /* compute action to be authorized */
8050         action = 0;
8051         if (fmode & FREAD) {
8052                 action |= KAUTH_VNODE_READ_DATA;
8053         }
8054         if (fmode & (FWRITE | O_TRUNC)) {
8055                 /*
8056                  * If we are writing, appending, and not truncating,
8057                  * indicate that we are appending so that if the
8058                  * UF_APPEND or SF_APPEND bits are set, we do not deny
8059                  * the open.
8060                  */
8061                 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
8062                         action |= KAUTH_VNODE_APPEND_DATA;
8063                 } else {
8064                         action |= KAUTH_VNODE_WRITE_DATA;
8065                 }
8066         }
8067
8068         if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
8069                 return error;
8070
8071
8072         //
8073         // if the vnode is tagged VOPENEVT and the current process
8074         // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
8075         // flag to the open mode so that this open won't count against
8076         // the vnode when carbon delete() does a vnode_isinuse() to see
8077         // if a file is currently in use.  this allows spotlight
8078         // importers to not interfere with carbon apps that depend on
8079         // the no-delete-if-busy semantics of carbon delete().
8080         //
8081         if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
8082                 fmode |= O_EVTONLY;
8083         }
8084
8085         if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
8086                 return error;
8087         }
8088         if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
8089                 VNOP_CLOSE(vp, fmode, ctx);
8090                 return error;
8091         }
8092
8093         /* call out to allow 3rd party notification of open.
8094          * Ignore result of kauth_authorize_fileop call.
8095          */
8096         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
8097                                (uintptr_t)vp, 0);
8098
8099
8100         return 0;
8101 }
8102
8103 static int
8104 wait_for_namespace_event(namespace_handler_info_ext *nhi, nspace_type_t nspace_type)
8105 {
8106         int i, error=0, unblock=0;
8107         task_t curtask;
8108
8109         lck_mtx_lock(&nspace_handler_exclusion_lock);
8110         if (nspace_handlers[nspace_type].handler_busy) {
8111                 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8112                 return EBUSY;
8113         }
8114         nspace_handlers[nspace_type].handler_busy = 1;
8115         lck_mtx_unlock(&nspace_handler_exclusion_lock);
8116
8117         /*
8118          * Any process that gets here will be one of the namespace handlers.
8119          * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
8120          * as we can cause deadlocks to occur, because the namespace handler may prevent
8121          * VNOP_INACTIVE from proceeding.  Mark the current task as a P_DEPENDENCY_CAPABLE
8122          * process.
8123          */
8124         curtask = current_task();
8125         bsd_set_dependency_capable (curtask);
8126
8127         lck_mtx_lock(&nspace_handler_lock);
8128         if (nspace_handlers[nspace_type].handler_proc == NULL) {
8129                 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
8130                 nspace_handlers[nspace_type].handler_proc = current_proc();
8131         }
8132
8133         while (error == 0) {
8134
8135                 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8136                         if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
8137                                 if (!nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
8138                                         continue;
8139                                 }
8140                                 break;
8141                         }
8142                 }
8143
8144                 if (i < MAX_NSPACE_ITEMS) {
8145                         nspace_items[i].flags  &= ~NSPACE_ITEM_NEW;
8146                         nspace_items[i].flags  |= NSPACE_ITEM_PROCESSING;
8147                         nspace_items[i].token  = ++nspace_token_id;
8148
8149                         if (nspace_items[i].vp) {
8150                                 struct fileproc *fp;
8151                                 int32_t indx, fmode;
8152                                 struct proc *p = current_proc();
8153                                 vfs_context_t ctx = vfs_context_current();
8154
8155                                 fmode = nspace_open_flags_for_type(nspace_type);
8156
8157                                 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
8158                                 if (error) {
8159                                         unblock = 1;
8160                                         break;
8161                                 }
8162                                 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
8163                                 if (error) {
8164                                         unblock = 1;
8165                                         vnode_put(nspace_items[i].vp);
8166                                         break;
8167                                 }
8168
8169                                 if ((error = falloc(p, &fp, &indx, ctx))) {
8170                                         vn_close(nspace_items[i].vp, fmode, ctx);
8171                                         vnode_put(nspace_items[i].vp);
8172                                         unblock = 1;
8173                                         break;
8174                                 }
8175
8176                                 fp->f_fglob->fg_flag = fmode;
8177                                 fp->f_fglob->fg_type = DTYPE_VNODE;
8178                                 fp->f_fglob->fg_ops = &vnops;
8179                                 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
8180
8181                                 proc_fdlock(p);
8182                                 procfdtbl_releasefd(p, indx, NULL);
8183                                 fp_drop(p, indx, fp, 1);
8184                                 proc_fdunlock(p);
8185
8186                                 error = copyout(&nspace_items[i].token, nhi->token, sizeof(uint32_t));
8187                                 error = copyout(&nspace_items[i].op, nhi->flags, sizeof(uint64_t));
8188                                 error = copyout(&indx, nhi->fdptr, sizeof(uint32_t));
8189                                 if (nhi->infoptr) {
8190                                         uio_t uio = (uio_t)nspace_items[i].arg;
8191                                         uint64_t u_offset, u_length;
8192
8193                                         if (uio) {
8194                                                 u_offset = uio_offset(uio);
8195                                                 u_length = uio_resid(uio);
8196                                         } else {
8197                                                 u_offset = 0;
8198                                                 u_length = 0;
8199                                         }
8200                                         error = copyout(&u_offset, nhi->infoptr, sizeof(uint64_t));
8201                                         error = copyout(&u_length, nhi->infoptr+sizeof(uint64_t), sizeof(uint64_t));
8202                                 }
8203                                 if (error) {
8204                                         vn_close(nspace_items[i].vp, fmode, ctx);
8205                                         fp_free(p, indx, fp);
8206                                         unblock = 1;
8207                                 }
8208
8209                                 vnode_put(nspace_items[i].vp);
8210
8211                                 break;
8212                         } else {
8213                                 printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
8214                                        i, nspace_items[i].vp, error, nspace_items[i].vp->v_name);
8215                         }
8216
8217                 } else {
8218                         error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
8219                         if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
8220                                 error = EINVAL;
8221                                 break;
8222                         }
8223
8224                 }
8225         }
8226
8227         if (unblock) {
8228                 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
8229                         vnode_lock_spin(nspace_items[i].vp);
8230                         nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8231                         vnode_unlock(nspace_items[i].vp);
8232                 }
8233                 nspace_items[i].vp = NULL;
8234                 nspace_items[i].vid = 0;
8235                 nspace_items[i].flags = NSPACE_ITEM_DONE;
8236                 nspace_items[i].token = 0;
8237
8238                 wakeup((caddr_t)&(nspace_items[i].vp));
8239         }
8240
8241         if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
8242                 // just go through every snapshot event and unblock it immediately.
8243                 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
8244                         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8245                                 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
8246                                         if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
8247                                                 nspace_items[i].vp = NULL;
8248                                                 nspace_items[i].vid = 0;
8249                                                 nspace_items[i].flags = NSPACE_ITEM_DONE;
8250                                                 nspace_items[i].token = 0;
8251
8252                                                 wakeup((caddr_t)&(nspace_items[i].vp));
8253                                         }
8254                                 }
8255                         }
8256                 }
8257         }
8258
8259         lck_mtx_unlock(&nspace_handler_lock);
8260
8261         lck_mtx_lock(&nspace_handler_exclusion_lock);
8262         nspace_handlers[nspace_type].handler_busy = 0;
8263         lck_mtx_unlock(&nspace_handler_exclusion_lock);
8264
8265         return error;
8266 }
8267
8268
8269 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
8270 {
8271         int error = 0;
8272         namespace_handler_info_ext nhi;
8273
8274         if (nspace_type == NSPACE_HANDLER_SNAPSHOT && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
8275                 return EINVAL;
8276         }
8277
8278         if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8279                 return error;
8280         }
8281
8282         if (   (is64bit && size != sizeof(user64_namespace_handler_info) && size != sizeof(user64_namespace_handler_info_ext))
8283             || (is64bit == 0 && size != sizeof(user32_namespace_handler_info) && size != sizeof(user32_namespace_handler_info_ext))) {
8284
8285                 // either you're 64-bit and passed a 64-bit struct or
8286                 // you're 32-bit and passed a 32-bit struct.  otherwise
8287                 // it's not ok.
8288                 return EINVAL;
8289         }
8290
8291         if (is64bit) {
8292                 nhi.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
8293                 nhi.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
8294                 nhi.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
8295                 if (size == sizeof(user64_namespace_handler_info_ext)) {
8296                         nhi.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
8297                 } else {
8298                         nhi.infoptr = 0;
8299                 }
8300         } else {
8301                 nhi.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
8302                 nhi.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
8303                 nhi.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
8304                 if (size == sizeof(user32_namespace_handler_info_ext)) {
8305                         nhi.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
8306                 } else {
8307                         nhi.infoptr = 0;
8308                 }
8309         }
8310
8311         return wait_for_namespace_event(&nhi, nspace_type);
8312 }
8313
8314 /*
8315  * Make a filesystem-specific control call:
8316  */
8317 /* ARGSUSED */
8318 static int
8319 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
8320 {
8321         int error=0;
8322         boolean_t is64bit;
8323         u_int size;
8324 #define STK_PARAMS 128
8325         char stkbuf[STK_PARAMS];
8326         caddr_t data, memp;
8327         vnode_t vp = *arg_vp;
8328
8329         size = IOCPARM_LEN(cmd);
8330         if (size > IOCPARM_MAX) return (EINVAL);
8331
8332         is64bit = proc_is64bit(p);
8333
8334         memp = NULL;
8335         if (size > sizeof (stkbuf)) {
8336                 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
8337                 data = memp;
8338         } else {
8339                 data = &stkbuf[0];
8340         };
8341
8342         if (cmd & IOC_IN) {
8343                 if (size) {
8344                         error = copyin(udata, data, size);
8345                         if (error) goto FSCtl_Exit;
8346                 } else {
8347                         if (is64bit) {
8348                                 *(user_addr_t *)data = udata;
8349                         }
8350                         else {
8351                                 *(uint32_t *)data = (uint32_t)udata;
8352                         }
8353                 };
8354         } else if ((cmd & IOC_OUT) && size) {
8355                 /*
8356                  * Zero the buffer so the user always
8357                  * gets back something deterministic.
8358                  */
8359                 bzero(data, size);
8360         } else if (cmd & IOC_VOID) {
8361                 if (is64bit) {
8362                         *(user_addr_t *)data = udata;
8363                 }
8364                 else {
8365                         *(uint32_t *)data = (uint32_t)udata;
8366                 }
8367         }
8368
8369         /* Check to see if it's a generic command */
8370         if (IOCBASECMD(cmd) == FSCTL_SYNC_VOLUME) {
8371                 mount_t mp = vp->v_mount;
8372                 int arg = *(uint32_t*)data;
8373
8374                 /* record vid of vp so we can drop it below. */
8375                 uint32_t vvid = vp->v_id;
8376
8377                 /*
8378                  * Then grab mount_iterref so that we can release the vnode.
8379                  * Without this, a thread may call vnode_iterate_prepare then
8380                  * get into a deadlock because we've never released the root vp
8381                  */
8382                 error = mount_iterref (mp, 0);
8383                 if (error)  {
8384                         goto FSCtl_Exit;
8385                 }
8386                 vnode_put(vp);
8387
8388                 /* issue the sync for this volume */
8389                 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
8390
8391                 /*
8392                  * Then release the mount_iterref once we're done syncing; it's not
8393                  * needed for the VNOP_IOCTL below
8394                  */
8395                 mount_iterdrop(mp);
8396
8397                 if (arg & FSCTL_SYNC_FULLSYNC) {
8398                         /* re-obtain vnode iocount on the root vp, if possible */
8399                         error = vnode_getwithvid (vp, vvid);
8400                         if (error == 0) {
8401                                 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
8402                                 vnode_put (vp);
8403                         }
8404                 }
8405                 /* mark the argument VP as having been released */
8406                 *arg_vp = NULL;
8407
8408         } else if (IOCBASECMD(cmd) == FSCTL_SET_PACKAGE_EXTS) {
8409                 user_addr_t ext_strings;
8410                 uint32_t    num_entries;
8411                 uint32_t    max_width;
8412
8413                 if (   (is64bit && size != sizeof(user64_package_ext_info))
8414                    || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
8415
8416                         // either you're 64-bit and passed a 64-bit struct or
8417                         // you're 32-bit and passed a 32-bit struct.  otherwise
8418                         // it's not ok.
8419                         error = EINVAL;
8420                         goto FSCtl_Exit;
8421                 }
8422
8423                 if (is64bit) {
8424                         ext_strings = ((user64_package_ext_info *)data)->strings;
8425                         num_entries = ((user64_package_ext_info *)data)->num_entries;
8426                         max_width   = ((user64_package_ext_info *)data)->max_width;
8427                 } else {
8428                         ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
8429                         num_entries = ((user32_package_ext_info *)data)->num_entries;
8430                         max_width   = ((user32_package_ext_info *)data)->max_width;
8431                 }
8432
8433                 error = set_package_extensions_table(ext_strings, num_entries, max_width);
8434
8435         } else if (IOCBASECMD(cmd) == FSCTL_WAIT_FOR_SYNC) {
8436                 error = tsleep((caddr_t)&sync_wait_time, PVFS|PCATCH, "sync-wait", 0);
8437                 if (error == 0) {
8438                         *(uint32_t *)data = (uint32_t)sync_wait_time;
8439                         error = 0;
8440                 } else {
8441                         error *= -1;
8442                 }
8443
8444         } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_GET) {
8445                 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
8446         } else if (IOCBASECMD(cmd) == FSCTL_OLD_SNAPSHOT_HANDLER_GET) {
8447                 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
8448         } else if (IOCBASECMD(cmd) == FSCTL_SNAPSHOT_HANDLER_GET_EXT) {
8449                 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
8450         } else if (IOCBASECMD(cmd) == FSCTL_TRACKED_HANDLER_GET) {
8451                 error = process_namespace_fsctl(NSPACE_HANDLER_TRACK, is64bit, size, data);
8452         } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_UPDATE) {
8453                 uint32_t token, val;
8454                 int i;
8455
8456                 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
8457                         goto FSCtl_Exit;
8458                 }
8459
8460                 if (!nspace_is_special_process(p)) {
8461                         error = EINVAL;
8462                         goto FSCtl_Exit;
8463                 }
8464
8465                 token = ((uint32_t *)data)[0];
8466                 val   = ((uint32_t *)data)[1];
8467
8468                 lck_mtx_lock(&nspace_handler_lock);
8469
8470                 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8471                         if (nspace_items[i].token == token) {
8472                                 break;
8473                         }
8474                 }
8475
8476                 if (i >= MAX_NSPACE_ITEMS) {
8477                         error = ENOENT;
8478                 } else {
8479                         //
8480                         // if this bit is set, when resolve_nspace_item() times out
8481                         // it will loop and go back to sleep.
8482                         //
8483                         nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
8484                 }
8485
8486                 lck_mtx_unlock(&nspace_handler_lock);
8487
8488                 if (error) {
8489                         printf("nspace-handler-update: did not find token %u\n", token);
8490                 }
8491
8492         } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_UNBLOCK) {
8493                 uint32_t token, val;
8494                 int i;
8495
8496                 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
8497                         goto FSCtl_Exit;
8498                 }
8499
8500                 if (!nspace_is_special_process(p)) {
8501                         error = EINVAL;
8502                         goto FSCtl_Exit;
8503                 }
8504
8505                 token = ((uint32_t *)data)[0];
8506                 val   = ((uint32_t *)data)[1];
8507
8508                 lck_mtx_lock(&nspace_handler_lock);
8509
8510                 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8511                         if (nspace_items[i].token == token) {
8512                                 break;
8513                         }
8514                 }
8515
8516                 if (i >= MAX_NSPACE_ITEMS) {
8517                         printf("nspace-handler-unblock: did not find token %u\n", token);
8518                         error = ENOENT;
8519                 } else {
8520                         if (val == 0 && nspace_items[i].vp) {
8521                                 vnode_lock_spin(nspace_items[i].vp);
8522                                 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8523                                 vnode_unlock(nspace_items[i].vp);
8524                         }
8525
8526                         nspace_items[i].vp = NULL;
8527                         nspace_items[i].arg = NULL;
8528                         nspace_items[i].op = 0;
8529                         nspace_items[i].vid = 0;
8530                         nspace_items[i].flags = NSPACE_ITEM_DONE;
8531                         nspace_items[i].token = 0;
8532
8533                         wakeup((caddr_t)&(nspace_items[i].vp));
8534                 }
8535
8536                 lck_mtx_unlock(&nspace_handler_lock);
8537
8538         } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_CANCEL) {
8539                 uint32_t token, val;
8540                 int i;
8541
8542                 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
8543                         goto FSCtl_Exit;
8544                 }
8545
8546                 if (!nspace_is_special_process(p)) {
8547                         error = EINVAL;
8548                         goto FSCtl_Exit;
8549                 }
8550
8551                 token = ((uint32_t *)data)[0];
8552                 val   = ((uint32_t *)data)[1];
8553
8554                 lck_mtx_lock(&nspace_handler_lock);
8555
8556                 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8557                         if (nspace_items[i].token == token) {
8558                                 break;
8559                         }
8560                 }
8561
8562                 if (i >= MAX_NSPACE_ITEMS) {
8563                         printf("nspace-handler-cancel: did not find token %u\n", token);
8564                         error = ENOENT;
8565                 } else {
8566                         if (nspace_items[i].vp) {
8567                                 vnode_lock_spin(nspace_items[i].vp);
8568                                 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8569                                 vnode_unlock(nspace_items[i].vp);
8570                         }
8571
8572                         nspace_items[i].vp = NULL;
8573                         nspace_items[i].arg = NULL;
8574                         nspace_items[i].vid = 0;
8575                         nspace_items[i].token = val;
8576                         nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
8577                         nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
8578
8579                         wakeup((caddr_t)&(nspace_items[i].vp));
8580                 }
8581
8582                 lck_mtx_unlock(&nspace_handler_lock);
8583         } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME) {
8584                 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8585                         goto FSCtl_Exit;
8586                 }
8587
8588                 // we explicitly do not do the namespace_handler_proc check here
8589
8590                 lck_mtx_lock(&nspace_handler_lock);
8591                 snapshot_timestamp = ((uint32_t *)data)[0];
8592                 wakeup(&nspace_item_idx);
8593                 lck_mtx_unlock(&nspace_handler_lock);
8594                 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
8595
8596         } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS) {
8597                 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8598                         goto FSCtl_Exit;
8599                 }
8600
8601                 lck_mtx_lock(&nspace_handler_lock);
8602                 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
8603                 lck_mtx_unlock(&nspace_handler_lock);
8604                 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
8605                        nspace_allow_virtual_devs ? "" : " NOT");
8606                 error = 0;
8607
8608         } else if (IOCBASECMD(cmd) == FSCTL_SET_FSTYPENAME_OVERRIDE) {
8609                 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8610                         goto FSCtl_Exit;
8611                 }
8612                 if (vp->v_mount) {
8613                         mount_lock(vp->v_mount);
8614                         if (data[0] != 0) {
8615                                 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
8616                                 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
8617                                 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
8618                                         vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
8619                                         vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
8620                                 }
8621                         } else {
8622                                 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
8623                                         vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
8624                                 }
8625                                 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
8626                                 vp->v_mount->fstypename_override[0] = '\0';
8627                         }
8628                         mount_unlock(vp->v_mount);
8629                 }
8630         } else {
8631                 /* Invoke the filesystem-specific code */
8632                 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
8633         }
8634
8635
8636         /*
8637          * Copy any data to user, size was
8638          * already set and checked above.
8639          */
8640         if (error == 0 && (cmd & IOC_OUT) && size)
8641                 error = copyout(data, udata, size);
8642
8643 FSCtl_Exit:
8644         if (memp) kfree(memp, size);
8645
8646         return error;
8647 }
8648
8649 /* ARGSUSED */
8650 int
8651 fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
8652 {
8653         int error;
8654         struct nameidata nd;
8655         u_long nameiflags;
8656         vnode_t vp = NULL;
8657         vfs_context_t ctx = vfs_context_current();
8658
8659         AUDIT_ARG(cmd, uap->cmd);
8660         AUDIT_ARG(value32, uap->options);
8661         /* Get the vnode for the file we are getting info on:  */
8662         nameiflags = 0;
8663         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8664         NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
8665                UIO_USERSPACE, uap->path, ctx);
8666         if ((error = namei(&nd))) goto done;
8667         vp = nd.ni_vp;
8668         nameidone(&nd);
8669
8670 #if CONFIG_MACF
8671         error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
8672         if (error) {
8673                 goto done;
8674         }
8675 #endif
8676
8677         error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
8678
8679 done:
8680         if (vp)
8681                 vnode_put(vp);
8682         return error;
8683 }
8684 /* ARGSUSED */
8685 int
8686 ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
8687 {
8688         int error;
8689         vnode_t vp = NULL;
8690         vfs_context_t ctx = vfs_context_current();
8691         int fd = -1;
8692
8693         AUDIT_ARG(fd, uap->fd);
8694         AUDIT_ARG(cmd, uap->cmd);
8695         AUDIT_ARG(value32, uap->options);
8696
8697         /* Get the vnode for the file we are getting info on:  */
8698         if ((error = file_vnode(uap->fd, &vp)))
8699                 goto done;
8700         fd = uap->fd;
8701         if ((error = vnode_getwithref(vp))) {
8702                 goto done;
8703         }
8704
8705 #if CONFIG_MACF
8706         error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
8707         if (error) {
8708                 goto done;
8709         }
8710 #endif
8711
8712         error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
8713
8714 done:
8715         if (fd != -1)
8716                 file_drop(fd);
8717
8718         if (vp)
8719                 vnode_put(vp);
8720         return error;
8721 }
8722 /* end of fsctl system call */
8723
8724 /*
8725  * An in-kernel sync for power management to call.
8726  */
8727 __private_extern__ int
8728 sync_internal(void)
8729 {
8730         int error;
8731
8732         struct sync_args data;
8733
8734         int retval[2];
8735
8736
8737         error = sync(current_proc(), &data, &retval[0]);
8738
8739
8740         return (error);
8741 } /* end of sync_internal call */
8742
8743
8744 /*
8745  *  Retrieve the data of an extended attribute.
8746  */
8747 int
8748 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
8749 {
8750         vnode_t vp;
8751         struct nameidata nd;
8752         char attrname[XATTR_MAXNAMELEN+1];
8753         vfs_context_t ctx = vfs_context_current();
8754         uio_t auio = NULL;
8755         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8756         size_t attrsize = 0;
8757         size_t namelen;
8758         u_int32_t nameiflags;
8759         int error;
8760         char uio_buf[ UIO_SIZEOF(1) ];
8761
8762         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
8763                 return (EINVAL);
8764
8765         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
8766         NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
8767         if ((error = namei(&nd))) {
8768                 return (error);
8769         }
8770         vp = nd.ni_vp;
8771         nameidone(&nd);
8772
8773         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
8774                 goto out;
8775         }
8776         if (xattr_protected(attrname)) {
8777                 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
8778                         error = EPERM;
8779                         goto out;
8780                 }
8781         }
8782         /*
8783          * the specific check for 0xffffffff is a hack to preserve
8784          * binaray compatibilty in K64 with applications that discovered
8785          * that passing in a buf pointer and a size of -1 resulted in
8786          * just the size of the indicated extended attribute being returned.
8787          * this isn't part of the documented behavior, but because of the
8788          * original implemtation's check for "uap->size > 0", this behavior
8789          * was allowed. In K32 that check turned into a signed comparison
8790          * even though uap->size is unsigned...  in K64, we blow by that
8791          * check because uap->size is unsigned and doesn't get sign smeared
8792          * in the munger for a 32 bit user app.  we also need to add a
8793          * check to limit the maximum size of the buffer being passed in...
8794          * unfortunately, the underlying fileystems seem to just malloc
8795          * the requested size even if the actual extended attribute is tiny.
8796          * because that malloc is for kernel wired memory, we have to put a
8797          * sane limit on it.
8798          *
8799          * U32 running on K64 will yield 0x00000000ffffffff for uap->size
8800          * U64 running on K64 will yield -1 (64 bits wide)
8801          * U32/U64 running on K32 will yield -1 (32 bits wide)
8802          */
8803         if (uap->size == 0xffffffff || uap->size == (size_t)-1)
8804                 goto no_uio;
8805
8806         if (uap->value) {
8807                 if (uap->size > (size_t)XATTR_MAXSIZE)
8808                         uap->size = XATTR_MAXSIZE;
8809
8810                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
8811                                             &uio_buf[0], sizeof(uio_buf));
8812                 uio_addiov(auio, uap->value, uap->size);
8813         }
8814 no_uio:
8815         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
8816 out:
8817         vnode_put(vp);
8818
8819         if (auio) {
8820                 *retval = uap->size - uio_resid(auio);
8821         } else {
8822                 *retval = (user_ssize_t)attrsize;
8823         }
8824
8825         return (error);
8826 }
8827
8828 /*
8829  * Retrieve the data of an extended attribute.
8830  */
8831 int
8832 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
8833 {
8834         vnode_t vp;
8835         char attrname[XATTR_MAXNAMELEN+1];
8836         uio_t auio = NULL;
8837         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8838         size_t attrsize = 0;
8839         size_t namelen;
8840         int error;
8841         char uio_buf[ UIO_SIZEOF(1) ];
8842
8843         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
8844                 return (EINVAL);
8845
8846         if ( (error = file_vnode(uap->fd, &vp)) ) {
8847                 return (error);
8848         }
8849         if ( (error = vnode_getwithref(vp)) ) {
8850                 file_drop(uap->fd);
8851                 return(error);
8852         }
8853         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
8854                 goto out;
8855         }
8856         if (xattr_protected(attrname)) {
8857                 error = EPERM;
8858                 goto out;
8859         }
8860         if (uap->value && uap->size > 0) {
8861                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
8862                                             &uio_buf[0], sizeof(uio_buf));
8863                 uio_addiov(auio, uap->value, uap->size);
8864         }
8865
8866         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
8867 out:
8868         (void)vnode_put(vp);
8869         file_drop(uap->fd);
8870
8871         if (auio) {
8872                 *retval = uap->size - uio_resid(auio);
8873         } else {
8874                 *retval = (user_ssize_t)attrsize;
8875         }
8876         return (error);
8877 }
8878
8879 /*
8880  * Set the data of an extended attribute.
8881  */
8882 int
8883 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
8884 {
8885         vnode_t vp;
8886         struct nameidata nd;
8887         char attrname[XATTR_MAXNAMELEN+1];
8888         vfs_context_t ctx = vfs_context_current();
8889         uio_t auio = NULL;
8890         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8891         size_t namelen;
8892         u_int32_t nameiflags;
8893         int error;
8894         char uio_buf[ UIO_SIZEOF(1) ];
8895
8896         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
8897                 return (EINVAL);
8898
8899         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
8900                 if (error == EPERM) {
8901                         /* if the string won't fit in attrname, copyinstr emits EPERM */
8902                         return (ENAMETOOLONG);
8903                 }
8904                 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
8905                 return error;
8906         }
8907         if (xattr_protected(attrname))
8908                 return(EPERM);
8909         if (uap->size != 0 && uap->value == 0) {
8910                 return (EINVAL);
8911         }
8912
8913         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
8914         NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
8915         if ((error = namei(&nd))) {
8916                 return (error);
8917         }
8918         vp = nd.ni_vp;
8919         nameidone(&nd);
8920
8921         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
8922                                     &uio_buf[0], sizeof(uio_buf));
8923         uio_addiov(auio, uap->value, uap->size);
8924
8925         error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
8926 #if CONFIG_FSE
8927         if (error == 0) {
8928                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
8929                     FSE_ARG_VNODE, vp,
8930                     FSE_ARG_DONE);
8931         }
8932 #endif
8933         vnode_put(vp);
8934         *retval = 0;
8935         return (error);
8936 }
8937
8938 /*
8939  * Set the data of an extended attribute.
8940  */
8941 int
8942 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
8943 {
8944         vnode_t vp;
8945         char attrname[XATTR_MAXNAMELEN+1];
8946         uio_t auio = NULL;
8947         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8948         size_t namelen;
8949         int error;
8950         char uio_buf[ UIO_SIZEOF(1) ];
8951 #if CONFIG_FSE
8952         vfs_context_t ctx = vfs_context_current();
8953 #endif
8954
8955         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
8956                 return (EINVAL);
8957
8958         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
8959                 return (error);
8960         }
8961         if (xattr_protected(attrname))
8962                 return(EPERM);
8963         if (uap->size != 0 && uap->value == 0) {
8964                 return (EINVAL);
8965         }
8966         if ( (error = file_vnode(uap->fd, &vp)) ) {
8967                 return (error);
8968         }
8969         if ( (error = vnode_getwithref(vp)) ) {
8970                 file_drop(uap->fd);
8971                 return(error);
8972         }
8973         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
8974                                     &uio_buf[0], sizeof(uio_buf));
8975         uio_addiov(auio, uap->value, uap->size);
8976
8977         error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
8978 #if CONFIG_FSE
8979         if (error == 0) {
8980                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
8981                     FSE_ARG_VNODE, vp,
8982                     FSE_ARG_DONE);
8983         }
8984 #endif
8985         vnode_put(vp);
8986         file_drop(uap->fd);
8987         *retval = 0;
8988         return (error);
8989 }
8990
8991 /*
8992  * Remove an extended attribute.
8993  * XXX Code duplication here.
8994  */
8995 int
8996 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
8997 {
8998         vnode_t vp;
8999         struct nameidata nd;
9000         char attrname[XATTR_MAXNAMELEN+1];
9001         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9002         vfs_context_t ctx = vfs_context_current();
9003         size_t namelen;
9004         u_int32_t nameiflags;
9005         int error;
9006
9007         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9008                 return (EINVAL);
9009
9010         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
9011         if (error != 0) {
9012                 return (error);
9013         }
9014         if (xattr_protected(attrname))
9015                 return(EPERM);
9016         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9017         NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
9018         if ((error = namei(&nd))) {
9019                 return (error);
9020         }
9021         vp = nd.ni_vp;
9022         nameidone(&nd);
9023
9024         error = vn_removexattr(vp, attrname, uap->options, ctx);
9025 #if CONFIG_FSE
9026         if (error == 0) {
9027                 add_fsevent(FSE_XATTR_REMOVED, ctx,
9028                     FSE_ARG_VNODE, vp,
9029                     FSE_ARG_DONE);
9030         }
9031 #endif
9032         vnode_put(vp);
9033         *retval = 0;
9034         return (error);
9035 }
9036
9037 /*
9038  * Remove an extended attribute.
9039  * XXX Code duplication here.
9040  */
9041 int
9042 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
9043 {
9044         vnode_t vp;
9045         char attrname[XATTR_MAXNAMELEN+1];
9046         size_t namelen;
9047         int error;
9048 #if CONFIG_FSE
9049         vfs_context_t ctx = vfs_context_current();
9050 #endif
9051
9052         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9053                 return (EINVAL);
9054
9055         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
9056         if (error != 0) {
9057                 return (error);
9058         }
9059         if (xattr_protected(attrname))
9060                 return(EPERM);
9061         if ( (error = file_vnode(uap->fd, &vp)) ) {
9062                 return (error);
9063         }
9064         if ( (error = vnode_getwithref(vp)) ) {
9065                 file_drop(uap->fd);
9066                 return(error);
9067         }
9068
9069         error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
9070 #if CONFIG_FSE
9071         if (error == 0) {
9072                 add_fsevent(FSE_XATTR_REMOVED, ctx,
9073                     FSE_ARG_VNODE, vp,
9074                     FSE_ARG_DONE);
9075         }
9076 #endif
9077         vnode_put(vp);
9078         file_drop(uap->fd);
9079         *retval = 0;
9080         return (error);
9081 }
9082
9083 /*
9084  * Retrieve the list of extended attribute names.
9085  * XXX Code duplication here.
9086  */
9087 int
9088 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
9089 {
9090         vnode_t vp;
9091         struct nameidata nd;
9092         vfs_context_t ctx = vfs_context_current();
9093         uio_t auio = NULL;
9094         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9095         size_t attrsize = 0;
9096         u_int32_t nameiflags;
9097         int error;
9098         char uio_buf[ UIO_SIZEOF(1) ];
9099
9100         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9101                 return (EINVAL);
9102
9103         nameiflags = ((uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW) | NOTRIGGER;
9104         NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
9105         if ((error = namei(&nd))) {
9106                 return (error);
9107         }
9108         vp = nd.ni_vp;
9109         nameidone(&nd);
9110         if (uap->namebuf != 0 && uap->bufsize > 0) {
9111                 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
9112                                             &uio_buf[0], sizeof(uio_buf));
9113                 uio_addiov(auio, uap->namebuf, uap->bufsize);
9114         }
9115
9116         error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
9117
9118         vnode_put(vp);
9119         if (auio) {
9120                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
9121         } else {
9122                 *retval = (user_ssize_t)attrsize;
9123         }
9124         return (error);
9125 }
9126
9127 /*
9128  * Retrieve the list of extended attribute names.
9129  * XXX Code duplication here.
9130  */
9131 int
9132 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
9133 {
9134         vnode_t vp;
9135         uio_t auio = NULL;
9136         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9137         size_t attrsize = 0;
9138         int error;
9139         char uio_buf[ UIO_SIZEOF(1) ];
9140
9141         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9142                 return (EINVAL);
9143
9144         if ( (error = file_vnode(uap->fd, &vp)) ) {
9145                 return (error);
9146         }
9147         if ( (error = vnode_getwithref(vp)) ) {
9148                 file_drop(uap->fd);
9149                 return(error);
9150         }
9151         if (uap->namebuf != 0 && uap->bufsize > 0) {
9152                 auio = uio_createwithbuffer(1, 0, spacetype,
9153                                                                           UIO_READ, &uio_buf[0], sizeof(uio_buf));
9154                 uio_addiov(auio, uap->namebuf, uap->bufsize);
9155         }
9156
9157         error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
9158
9159         vnode_put(vp);
9160         file_drop(uap->fd);
9161         if (auio) {
9162                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
9163         } else {
9164                 *retval = (user_ssize_t)attrsize;
9165         }
9166         return (error);
9167 }
9168
9169 /*
9170  * Obtain the full pathname of a file system object by id.
9171  *
9172  * This is a private SPI used by the File Manager.
9173  */
9174 __private_extern__
9175 int
9176 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
9177 {
9178         vnode_t vp;
9179         struct mount *mp = NULL;
9180         vfs_context_t ctx = vfs_context_current();
9181         fsid_t fsid;
9182         char *realpath;
9183         int bpflags;
9184         int length;
9185         int error;
9186
9187         if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
9188                 return (error);
9189         }
9190         AUDIT_ARG(value32, fsid.val[0]);
9191         AUDIT_ARG(value64, uap->objid);
9192         /* Restrict output buffer size for now. */
9193         if (uap->bufsize > PAGE_SIZE) {
9194                 return (EINVAL);
9195         }
9196         MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
9197         if (realpath == NULL) {
9198                 return (ENOMEM);
9199         }
9200         /* Find the target mountpoint. */
9201         if ((mp = mount_lookupby_volfsid(fsid.val[0], 1)) == NULL) {
9202                 error = ENOTSUP;  /* unexpected failure */
9203                 goto out;
9204         }
9205         /* Find the target vnode. */
9206         if (uap->objid == 2) {
9207                 error = VFS_ROOT(mp, &vp, ctx);
9208         } else {
9209                 error = VFS_VGET(mp, (ino64_t)uap->objid, &vp, ctx);
9210         }
9211         vfs_unbusy(mp);
9212         if (error) {
9213                 goto out;
9214         }
9215 #if CONFIG_MACF
9216         error = mac_vnode_check_fsgetpath(ctx, vp);
9217         if (error) {
9218                 vnode_put(vp);
9219                 goto out;
9220         }
9221 #endif
9222         /* Obtain the absolute path to this vnode. */
9223         bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
9224         error = build_path(vp, realpath, uap->bufsize, &length, bpflags, ctx);
9225         vnode_put(vp);
9226         if (error) {
9227                 goto out;
9228         }
9229         AUDIT_ARG(text, realpath);
9230         error = copyout((caddr_t)realpath, uap->buf, length);
9231
9232         *retval = (user_ssize_t)length; /* may be superseded by error */
9233 out:
9234         if (realpath) {
9235                 FREE(realpath, M_TEMP);
9236         }
9237         return (error);
9238 }
9239
9240 /*
9241  * Common routine to handle various flavors of statfs data heading out
9242  *      to user space.
9243  *
9244  * Returns:     0                       Success
9245  *              EFAULT
9246  */
9247 static int
9248 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
9249     user_addr_t bufp, int *sizep, boolean_t is_64_bit,
9250     boolean_t partial_copy)
9251 {
9252         int             error;
9253         int             my_size, copy_size;
9254
9255         if (is_64_bit) {
9256                 struct user64_statfs sfs;
9257                 my_size = copy_size = sizeof(sfs);
9258                 bzero(&sfs, my_size);
9259                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
9260                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
9261                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
9262                 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
9263                 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
9264                 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
9265                 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
9266                 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
9267                 sfs.f_files = (user64_long_t)sfsp->f_files;
9268                 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
9269                 sfs.f_fsid = sfsp->f_fsid;
9270                 sfs.f_owner = sfsp->f_owner;
9271                 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
9272                         strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
9273                 } else {
9274                         strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
9275                 }
9276                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
9277                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
9278
9279                 if (partial_copy) {
9280                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
9281                 }
9282                 error = copyout((caddr_t)&sfs, bufp, copy_size);
9283         }
9284         else {
9285                 struct user32_statfs sfs;
9286
9287                 my_size = copy_size = sizeof(sfs);
9288                 bzero(&sfs, my_size);
9289
9290                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
9291                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
9292                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
9293
9294                 /*
9295                  * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
9296                  * have to fudge the numbers here in that case.   We inflate the blocksize in order
9297                  * to reflect the filesystem size as best we can.
9298                  */
9299                 if ((sfsp->f_blocks > INT_MAX)
9300                         /* Hack for 4061702 . I think the real fix is for Carbon to
9301                          * look for some volume capability and not depend on hidden
9302                          * semantics agreed between a FS and carbon.
9303                          * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
9304                          * for Carbon to set bNoVolumeSizes volume attribute.
9305                          * Without this the webdavfs files cannot be copied onto
9306                          * disk as they look huge. This change should not affect
9307                          * XSAN as they should not setting these to -1..
9308                          */
9309                          && (sfsp->f_blocks != 0xffffffffffffffffULL)
9310                          && (sfsp->f_bfree != 0xffffffffffffffffULL)
9311                          && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
9312                         int             shift;
9313
9314                         /*
9315                          * Work out how far we have to shift the block count down to make it fit.
9316                          * Note that it's possible to have to shift so far that the resulting
9317                          * blocksize would be unreportably large.  At that point, we will clip
9318                          * any values that don't fit.
9319                          *
9320                          * For safety's sake, we also ensure that f_iosize is never reported as
9321                          * being smaller than f_bsize.
9322                          */
9323                         for (shift = 0; shift < 32; shift++) {
9324                                 if ((sfsp->f_blocks >> shift) <= INT_MAX)
9325                                         break;
9326                                 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
9327                                         break;
9328                         }
9329 #define __SHIFT_OR_CLIP(x, s)   ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
9330                         sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
9331                         sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
9332                         sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
9333 #undef __SHIFT_OR_CLIP
9334                         sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
9335                         sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
9336                 } else {
9337                         /* filesystem is small enough to be reported honestly */
9338                         sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
9339                         sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
9340                         sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
9341                         sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
9342                         sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
9343                 }
9344                 sfs.f_files = (user32_long_t)sfsp->f_files;
9345                 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
9346                 sfs.f_fsid = sfsp->f_fsid;
9347                 sfs.f_owner = sfsp->f_owner;
9348                 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
9349                         strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
9350                 } else {
9351                         strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
9352                 }
9353                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
9354                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
9355
9356                 if (partial_copy) {
9357                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
9358                 }
9359                 error = copyout((caddr_t)&sfs, bufp, copy_size);
9360         }
9361
9362         if (sizep != NULL) {
9363                 *sizep = my_size;
9364         }
9365         return(error);
9366 }
9367
9368 /*
9369  * copy stat structure into user_stat structure.
9370  */
9371 void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
9372 {
9373         bzero(usbp, sizeof(*usbp));
9374
9375         usbp->st_dev = sbp->st_dev;
9376         usbp->st_ino = sbp->st_ino;
9377         usbp->st_mode = sbp->st_mode;
9378         usbp->st_nlink = sbp->st_nlink;
9379         usbp->st_uid = sbp->st_uid;
9380         usbp->st_gid = sbp->st_gid;
9381         usbp->st_rdev = sbp->st_rdev;
9382 #ifndef _POSIX_C_SOURCE
9383         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9384         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9385         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9386         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9387         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9388         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9389 #else
9390         usbp->st_atime = sbp->st_atime;
9391         usbp->st_atimensec = sbp->st_atimensec;
9392         usbp->st_mtime = sbp->st_mtime;
9393         usbp->st_mtimensec = sbp->st_mtimensec;
9394         usbp->st_ctime = sbp->st_ctime;
9395         usbp->st_ctimensec = sbp->st_ctimensec;
9396 #endif
9397         usbp->st_size = sbp->st_size;
9398         usbp->st_blocks = sbp->st_blocks;
9399         usbp->st_blksize = sbp->st_blksize;
9400         usbp->st_flags = sbp->st_flags;
9401         usbp->st_gen = sbp->st_gen;
9402         usbp->st_lspare = sbp->st_lspare;
9403         usbp->st_qspare[0] = sbp->st_qspare[0];
9404         usbp->st_qspare[1] = sbp->st_qspare[1];
9405 }
9406
9407 void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
9408 {
9409         bzero(usbp, sizeof(*usbp));
9410
9411         usbp->st_dev = sbp->st_dev;
9412         usbp->st_ino = sbp->st_ino;
9413         usbp->st_mode = sbp->st_mode;
9414         usbp->st_nlink = sbp->st_nlink;
9415         usbp->st_uid = sbp->st_uid;
9416         usbp->st_gid = sbp->st_gid;
9417         usbp->st_rdev = sbp->st_rdev;
9418 #ifndef _POSIX_C_SOURCE
9419         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9420         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9421         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9422         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9423         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9424         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9425 #else
9426         usbp->st_atime = sbp->st_atime;
9427         usbp->st_atimensec = sbp->st_atimensec;
9428         usbp->st_mtime = sbp->st_mtime;
9429         usbp->st_mtimensec = sbp->st_mtimensec;
9430         usbp->st_ctime = sbp->st_ctime;
9431         usbp->st_ctimensec = sbp->st_ctimensec;
9432 #endif
9433         usbp->st_size = sbp->st_size;
9434         usbp->st_blocks = sbp->st_blocks;
9435         usbp->st_blksize = sbp->st_blksize;
9436         usbp->st_flags = sbp->st_flags;
9437         usbp->st_gen = sbp->st_gen;
9438         usbp->st_lspare = sbp->st_lspare;
9439         usbp->st_qspare[0] = sbp->st_qspare[0];
9440         usbp->st_qspare[1] = sbp->st_qspare[1];
9441 }
9442
9443 /*
9444  * copy stat64 structure into user_stat64 structure.
9445  */
9446 void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
9447 {
9448         bzero(usbp, sizeof(*usbp));
9449
9450         usbp->st_dev = sbp->st_dev;
9451         usbp->st_ino = sbp->st_ino;
9452         usbp->st_mode = sbp->st_mode;
9453         usbp->st_nlink = sbp->st_nlink;
9454         usbp->st_uid = sbp->st_uid;
9455         usbp->st_gid = sbp->st_gid;
9456         usbp->st_rdev = sbp->st_rdev;
9457 #ifndef _POSIX_C_SOURCE
9458         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9459         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9460         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9461         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9462         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9463         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9464         usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
9465         usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
9466 #else
9467         usbp->st_atime = sbp->st_atime;
9468         usbp->st_atimensec = sbp->st_atimensec;
9469         usbp->st_mtime = sbp->st_mtime;
9470         usbp->st_mtimensec = sbp->st_mtimensec;
9471         usbp->st_ctime = sbp->st_ctime;
9472         usbp->st_ctimensec = sbp->st_ctimensec;
9473         usbp->st_birthtime = sbp->st_birthtime;
9474         usbp->st_birthtimensec = sbp->st_birthtimensec;
9475 #endif
9476         usbp->st_size = sbp->st_size;
9477         usbp->st_blocks = sbp->st_blocks;
9478         usbp->st_blksize = sbp->st_blksize;
9479         usbp->st_flags = sbp->st_flags;
9480         usbp->st_gen = sbp->st_gen;
9481         usbp->st_lspare = sbp->st_lspare;
9482         usbp->st_qspare[0] = sbp->st_qspare[0];
9483         usbp->st_qspare[1] = sbp->st_qspare[1];
9484 }
9485
9486 void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
9487 {
9488         bzero(usbp, sizeof(*usbp));
9489
9490         usbp->st_dev = sbp->st_dev;
9491         usbp->st_ino = sbp->st_ino;
9492         usbp->st_mode = sbp->st_mode;
9493         usbp->st_nlink = sbp->st_nlink;
9494         usbp->st_uid = sbp->st_uid;
9495         usbp->st_gid = sbp->st_gid;
9496         usbp->st_rdev = sbp->st_rdev;
9497 #ifndef _POSIX_C_SOURCE
9498         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9499         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9500         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9501         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9502         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9503         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9504         usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
9505         usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
9506 #else
9507         usbp->st_atime = sbp->st_atime;
9508         usbp->st_atimensec = sbp->st_atimensec;
9509         usbp->st_mtime = sbp->st_mtime;
9510         usbp->st_mtimensec = sbp->st_mtimensec;
9511         usbp->st_ctime = sbp->st_ctime;
9512         usbp->st_ctimensec = sbp->st_ctimensec;
9513         usbp->st_birthtime = sbp->st_birthtime;
9514         usbp->st_birthtimensec = sbp->st_birthtimensec;
9515 #endif
9516         usbp->st_size = sbp->st_size;
9517         usbp->st_blocks = sbp->st_blocks;
9518         usbp->st_blksize = sbp->st_blksize;
9519         usbp->st_flags = sbp->st_flags;
9520         usbp->st_gen = sbp->st_gen;
9521         usbp->st_lspare = sbp->st_lspare;
9522         usbp->st_qspare[0] = sbp->st_qspare[0];
9523         usbp->st_qspare[1] = sbp->st_qspare[1];
9524 }