bsd/vfs/vfs_syscalls.c

   1 /*
   2  * Copyright (c) 1995-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1989, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  * (c) UNIX System Laboratories, Inc.
  32  * All or some portions of this file are derived from material licensed
  33  * to the University of California by American Telephone and Telegraph
  34  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  35  * the permission of UNIX System Laboratories, Inc.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      @(#)vfs_syscalls.c      8.41 (Berkeley) 6/15/95
  66  */
  67 /*
  68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  69  * support for mandatory and extensible security protections.  This notice
  70  * is included in support of clause 2.2 (b) of the Apple Public License,
  71  * Version 2.0.
  72  */
  73
  74 #include <sys/param.h>
  75 #include <sys/systm.h>
  76 #include <sys/namei.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/kernel.h>
  79 #include <sys/file_internal.h>
  80 #include <sys/stat.h>
  81 #include <sys/vnode_internal.h>
  82 #include <sys/mount_internal.h>
  83 #include <sys/proc_internal.h>
  84 #include <sys/kauth.h>
  85 #include <sys/uio_internal.h>
  86 #include <sys/malloc.h>
  87 #include <sys/mman.h>
  88 #include <sys/dirent.h>
  89 #include <sys/attr.h>
  90 #include <sys/sysctl.h>
  91 #include <sys/ubc.h>
  92 #include <sys/quota.h>
  93 #include <sys/kdebug.h>
  94 #include <sys/fsevents.h>
  95 #include <sys/imgsrc.h>
  96 #include <sys/sysproto.h>
  97 #include <sys/xattr.h>
  98 #include <sys/fcntl.h>
  99 #include <sys/fsctl.h>
 100 #include <sys/ubc_internal.h>
 101 #include <sys/disk.h>
 102 #include <machine/cons.h>
 103 #include <machine/limits.h>
 104 #include <miscfs/specfs/specdev.h>
 105
 106 #include <security/audit/audit.h>
 107 #include <bsm/audit_kevents.h>
 108
 109 #include <mach/mach_types.h>
 110 #include <kern/kern_types.h>
 111 #include <kern/kalloc.h>
 112 #include <kern/task.h>
 113
 114 #include <vm/vm_pageout.h>
 115
 116 #include <libkern/OSAtomic.h>
 117 #include <pexpert/pexpert.h>
 118
 119 #if CONFIG_MACF
 120 #include <security/mac.h>
 121 #include <security/mac_framework.h>
 122 #endif
 123
 124 #if CONFIG_FSE
 125 #define GET_PATH(x) \
 126         (x) = get_pathbuff();
 127 #define RELEASE_PATH(x) \
 128         release_pathbuff(x);
 129 #else
 130 #define GET_PATH(x)     \
 131         MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
 132 #define RELEASE_PATH(x) \
 133         FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
 134 #endif /* CONFIG_FSE */
 135
 136 /* struct for checkdirs iteration */
 137 struct cdirargs {
 138         vnode_t olddp;
 139         vnode_t newdp;
 140 };
 141 /* callback  for checkdirs iteration */
 142 static int checkdirs_callback(proc_t p, void * arg);
 143
 144 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
 145 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
 146 void enablequotas(struct mount *mp, vfs_context_t ctx);
 147 static int getfsstat_callback(mount_t mp, void * arg);
 148 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
 149 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
 150 static int sync_callback(mount_t, void *);
 151 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
 152                         user_addr_t bufp, int *sizep, boolean_t is_64_bit,
 153                                                 boolean_t partial_copy);
 154 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
 155                         user_addr_t bufp);
 156 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
 157 static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
 158                         struct componentname *cnp, user_addr_t fsmountargs,
 159                         int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
 160                         vfs_context_t ctx);
 161 void vfs_notify_mount(vnode_t pdvp);
 162
 163 int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
 164
 165 #ifdef CONFIG_IMGSRC_ACCESS
 166 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
 167 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
 168 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
 169 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
 170 static void mount_end_update(mount_t mp);
 171 static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
 172 #endif /* CONFIG_IMGSRC_ACCESS */
 173
 174 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
 175
 176 __private_extern__
 177 int sync_internal(void);
 178
 179 __private_extern__
 180 int unlink1(vfs_context_t, struct nameidata *, int);
 181
 182 /*
 183  * incremented each time a mount or unmount operation occurs
 184  * used to invalidate the cached value of the rootvp in the
 185  * mount structure utilized by cache_lookup_path
 186  */
 187 uint32_t mount_generation = 0;
 188
 189 /* counts number of mount and unmount operations */
 190 unsigned int vfs_nummntops=0;
 191
 192 extern const struct fileops vnops;
 193 #if CONFIG_APPLEDOUBLE
 194 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
 195 #endif /* CONFIG_APPLEDOUBLE */
 196
 197 /*
 198  * Virtual File System System Calls
 199  */
 200
 201 #if NFSCLIENT
 202 /*
 203  * Private in-kernel mounting spi (NFS only, not exported)
 204  */
 205  __private_extern__
 206 boolean_t
 207 vfs_iskernelmount(mount_t mp)
 208 {
 209         return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
 210 }
 211
 212  __private_extern__
 213 int
 214 kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
 215              void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
 216 {
 217         struct nameidata nd;
 218         boolean_t did_namei;
 219         int error;
 220
 221         NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
 222                UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
 223
 224         /*
 225          * Get the vnode to be covered if it's not supplied
 226          */
 227         if (vp == NULLVP) {
 228                 error = namei(&nd);
 229                 if (error)
 230                         return (error);
 231                 vp = nd.ni_vp;
 232                 pvp = nd.ni_dvp;
 233                 did_namei = TRUE;
 234         } else {
 235                 char *pnbuf = CAST_DOWN(char *, path);
 236
 237                 nd.ni_cnd.cn_pnbuf = pnbuf;
 238                 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
 239                 did_namei = FALSE;
 240         }
 241
 242         error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
 243                              syscall_flags, kern_flags, NULL, TRUE, ctx);
 244
 245         if (did_namei) {
 246                 vnode_put(vp);
 247                 vnode_put(pvp);
 248                 nameidone(&nd);
 249         }
 250
 251         return (error);
 252 }
 253 #endif /* NFSCLIENT */
 254
 255 /*
 256  * Mount a file system.
 257  */
 258 /* ARGSUSED */
 259 int
 260 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
 261 {
 262         struct __mac_mount_args muap;
 263
 264         muap.type = uap->type;
 265         muap.path = uap->path;
 266         muap.flags = uap->flags;
 267         muap.data = uap->data;
 268         muap.mac_p = USER_ADDR_NULL;
 269         return (__mac_mount(p, &muap, retval));
 270 }
 271
 272 void
 273 vfs_notify_mount(vnode_t pdvp)
 274 {
 275         vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
 276         lock_vnode_and_post(pdvp, NOTE_WRITE);
 277 }
 278
 279 /*
 280  * __mac_mount:
 281  *      Mount a file system taking into account MAC label behavior.
 282  *      See mount(2) man page for more information
 283  *
 284  * Parameters:    p                        Process requesting the mount
 285  *                uap                      User argument descriptor (see below)
 286  *                retval                   (ignored)
 287  *
 288  * Indirect:      uap->type                Filesystem type
 289  *                uap->path                Path to mount
 290  *                uap->data                Mount arguments
 291  *                uap->mac_p               MAC info
 292  *                uap->flags               Mount flags
 293  *
 294  *
 295  * Returns:        0                       Success
 296  *                !0                       Not success
 297  */
 298 boolean_t root_fs_upgrade_try = FALSE;
 299
 300 int
 301 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
 302 {
 303         vnode_t pvp = NULL;
 304         vnode_t vp = NULL;
 305         int need_nameidone = 0;
 306         vfs_context_t ctx = vfs_context_current();
 307         char fstypename[MFSNAMELEN];
 308         struct nameidata nd;
 309         size_t dummy=0;
 310         char *labelstr = NULL;
 311         int flags = uap->flags;
 312         int error;
 313 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
 314         boolean_t is_64bit = IS_64BIT_PROCESS(p);
 315 #else
 316 #pragma unused(p)
 317 #endif
 318         /*
 319          * Get the fs type name from user space
 320          */
 321         error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
 322         if (error)
 323                 return (error);
 324
 325         /*
 326          * Get the vnode to be covered
 327          */
 328         NDINIT(&nd, LOOKUP, OP_MOUNT, NOTRIGGER | FOLLOW | AUDITVNPATH1 | WANTPARENT,
 329                UIO_USERSPACE, uap->path, ctx);
 330         error = namei(&nd);
 331         if (error) {
 332                 goto out;
 333         }
 334         need_nameidone = 1;
 335         vp = nd.ni_vp;
 336         pvp = nd.ni_dvp;
 337
 338 #ifdef CONFIG_IMGSRC_ACCESS
 339         /* Mounting image source cannot be batched with other operations */
 340         if (flags == MNT_IMGSRC_BY_INDEX) {
 341                 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
 342                                                   ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
 343                 goto out;
 344         }
 345 #endif /* CONFIG_IMGSRC_ACCESS */
 346
 347 #if CONFIG_MACF
 348         /*
 349          * Get the label string (if any) from user space
 350          */
 351         if (uap->mac_p != USER_ADDR_NULL) {
 352                 struct user_mac mac;
 353                 size_t ulen = 0;
 354
 355                 if (is_64bit) {
 356                         struct user64_mac mac64;
 357                         error = copyin(uap->mac_p, &mac64, sizeof(mac64));
 358                         mac.m_buflen = mac64.m_buflen;
 359                         mac.m_string = mac64.m_string;
 360                 } else {
 361                         struct user32_mac mac32;
 362                         error = copyin(uap->mac_p, &mac32, sizeof(mac32));
 363                         mac.m_buflen = mac32.m_buflen;
 364                         mac.m_string = mac32.m_string;
 365                 }
 366                 if (error)
 367                         goto out;
 368                 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
 369                     (mac.m_buflen < 2)) {
 370                         error = EINVAL;
 371                         goto out;
 372                 }
 373                 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
 374                 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
 375                 if (error) {
 376                         goto out;
 377                 }
 378                 AUDIT_ARG(mac_string, labelstr);
 379         }
 380 #endif /* CONFIG_MACF */
 381
 382         AUDIT_ARG(fflags, flags);
 383
 384         if ((vp->v_flag & VROOT) &&
 385                         (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
 386                 if (!(flags & MNT_UNION)) {
 387                         flags |= MNT_UPDATE;
 388                 }
 389                 else {
 390                         /*
 391                          * For a union mount on '/', treat it as fresh
 392                          * mount instead of update.
 393                          * Otherwise, union mouting on '/' used to panic the
 394                          * system before, since mnt_vnodecovered was found to
 395                          * be NULL for '/' which is required for unionlookup
 396                          * after it gets ENOENT on union mount.
 397                          */
 398                         flags = (flags & ~(MNT_UPDATE));
 399                 }
 400
 401 #if 0
 402 //#ifdef SECURE_KERNEL
 403                 if ((flags & MNT_RDONLY) == 0) {
 404                         /* Release kernels are not allowed to mount "/" as rw */
 405                         error = EPERM;
 406                         goto out;
 407                 }
 408 //#endif
 409 #endif
 410                 /*
 411                  * See 7392553 for more details on why this check exists.
 412                  * Suffice to say: If this check is ON and something tries
 413                  * to mount the rootFS RW, we'll turn off the codesign
 414                  * bitmap optimization.
 415                  */
 416 #if CHECK_CS_VALIDATION_BITMAP
 417                 if ((flags & MNT_RDONLY) == 0 ) {
 418                         root_fs_upgrade_try = TRUE;
 419                 }
 420 #endif
 421         }
 422
 423         error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
 424                              labelstr, FALSE, ctx);
 425
 426 out:
 427
 428 #if CONFIG_MACF
 429         if (labelstr)
 430                 FREE(labelstr, M_MACTEMP);
 431 #endif /* CONFIG_MACF */
 432
 433         if (vp) {
 434                 vnode_put(vp);
 435         }
 436         if (pvp) {
 437                 vnode_put(pvp);
 438         }
 439         if (need_nameidone) {
 440                 nameidone(&nd);
 441         }
 442
 443         return (error);
 444 }
 445
 446 /*
 447  * common mount implementation (final stage of mounting)
 448
 449  * Arguments:
 450  *  fstypename  file system type (ie it's vfs name)
 451  *  pvp         parent of covered vnode
 452  *  vp          covered vnode
 453  *  cnp         component name (ie path) of covered vnode
 454  *  flags       generic mount flags
 455  *  fsmountargs file system specific data
 456  *  labelstr    optional MAC label
 457  *  kernelmount TRUE for mounts initiated from inside the kernel
 458  *  ctx         caller's context
 459  */
 460 static int
 461 mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
 462              struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
 463              char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
 464 {
 465 #if !CONFIG_MACF
 466 #pragma unused(labelstr)
 467 #endif
 468         struct vnode *devvp = NULLVP;
 469         struct vnode *device_vnode = NULLVP;
 470 #if CONFIG_MACF
 471         struct vnode *rvp;
 472 #endif
 473         struct mount *mp;
 474         struct vfstable *vfsp = (struct vfstable *)0;
 475         struct proc *p = vfs_context_proc(ctx);
 476         int error, flag = 0;
 477         user_addr_t devpath = USER_ADDR_NULL;
 478         int ronly = 0;
 479         int mntalloc = 0;
 480         boolean_t vfsp_ref = FALSE;
 481         boolean_t is_rwlock_locked = FALSE;
 482         boolean_t did_rele = FALSE;
 483         boolean_t have_usecount = FALSE;
 484
 485         /*
 486          * Process an update for an existing mount
 487          */
 488         if (flags & MNT_UPDATE) {
 489                 if ((vp->v_flag & VROOT) == 0) {
 490                         error = EINVAL;
 491                         goto out1;
 492                 }
 493                 mp = vp->v_mount;
 494
 495                 /* unmount in progress return error */
 496                 mount_lock_spin(mp);
 497                 if (mp->mnt_lflag & MNT_LUNMOUNT) {
 498                         mount_unlock(mp);
 499                         error = EBUSY;
 500                         goto out1;
 501                 }
 502                 mount_unlock(mp);
 503                 lck_rw_lock_exclusive(&mp->mnt_rwlock);
 504                 is_rwlock_locked = TRUE;
 505                 /*
 506                  * We only allow the filesystem to be reloaded if it
 507                  * is currently mounted read-only.
 508                  */
 509                 if ((flags & MNT_RELOAD) &&
 510                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
 511                         error = ENOTSUP;
 512                         goto out1;
 513                 }
 514
 515                 /*
 516                  * If content protection is enabled, update mounts are not
 517                  * allowed to turn it off.
 518                  */
 519                 if ((mp->mnt_flag & MNT_CPROTECT) &&
 520                            ((flags & MNT_CPROTECT) == 0)) {
 521                         error = EINVAL;
 522                         goto out1;
 523                 }
 524
 525 #ifdef CONFIG_IMGSRC_ACCESS
 526                 /* Can't downgrade the backer of the root FS */
 527                 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
 528                         (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
 529                         error = ENOTSUP;
 530                         goto out1;
 531                 }
 532 #endif /* CONFIG_IMGSRC_ACCESS */
 533
 534                 /*
 535                  * Only root, or the user that did the original mount is
 536                  * permitted to update it.
 537                  */
 538                 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
 539                     (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
 540                         goto out1;
 541                 }
 542 #if CONFIG_MACF
 543                 error = mac_mount_check_remount(ctx, mp);
 544                 if (error != 0) {
 545                         goto out1;
 546                 }
 547 #endif
 548                 /*
 549                  * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
 550                  * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
 551                  */
 552                 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
 553                         flags |= MNT_NOSUID | MNT_NODEV;
 554                         if (mp->mnt_flag & MNT_NOEXEC)
 555                                 flags |= MNT_NOEXEC;
 556                 }
 557                 flag = mp->mnt_flag;
 558
 559
 560
 561                 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
 562
 563                 vfsp = mp->mnt_vtable;
 564                 goto update;
 565         }
 566         /*
 567          * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
 568          * MNT_NOEXEC if mount point is already MNT_NOEXEC.
 569          */
 570         if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
 571                 flags |= MNT_NOSUID | MNT_NODEV;
 572                 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
 573                         flags |= MNT_NOEXEC;
 574         }
 575
 576         /* XXXAUDIT: Should we capture the type on the error path as well? */
 577         AUDIT_ARG(text, fstypename);
 578         mount_list_lock();
 579         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 580                 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
 581                         vfsp->vfc_refcount++;
 582                         vfsp_ref = TRUE;
 583                         break;
 584                 }
 585         mount_list_unlock();
 586         if (vfsp == NULL) {
 587                 error = ENODEV;
 588                 goto out1;
 589         }
 590
 591         /*
 592          * VFC_VFSLOCALARGS is not currently supported for kernel mounts
 593          */
 594         if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
 595                 error = EINVAL;  /* unsupported request */
 596                 goto out1;
 597         }
 598
 599         error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
 600         if (error != 0) {
 601                 goto out1;
 602         }
 603
 604         /*
 605          * Allocate and initialize the filesystem (mount_t)
 606          */
 607         MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
 608                 M_MOUNT, M_WAITOK);
 609         bzero((char *)mp, (u_int32_t)sizeof(struct mount));
 610         mntalloc = 1;
 611
 612         /* Initialize the default IO constraints */
 613         mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
 614         mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
 615         mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
 616         mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
 617         mp->mnt_devblocksize = DEV_BSIZE;
 618         mp->mnt_alignmentmask = PAGE_MASK;
 619         mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
 620         mp->mnt_ioscale = 1;
 621         mp->mnt_ioflags = 0;
 622         mp->mnt_realrootvp = NULLVP;
 623         mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
 624
 625         TAILQ_INIT(&mp->mnt_vnodelist);
 626         TAILQ_INIT(&mp->mnt_workerqueue);
 627         TAILQ_INIT(&mp->mnt_newvnodes);
 628         mount_lock_init(mp);
 629         lck_rw_lock_exclusive(&mp->mnt_rwlock);
 630         is_rwlock_locked = TRUE;
 631         mp->mnt_op = vfsp->vfc_vfsops;
 632         mp->mnt_vtable = vfsp;
 633         //mp->mnt_stat.f_type = vfsp->vfc_typenum;
 634         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 635         strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
 636         strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
 637         mp->mnt_vnodecovered = vp;
 638         mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
 639         mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
 640         mp->mnt_devbsdunit = 0;
 641
 642         /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
 643         vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
 644
 645 #if NFSCLIENT
 646         if (kernelmount)
 647                 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
 648         if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
 649                 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
 650 #endif /* NFSCLIENT */
 651
 652 update:
 653         /*
 654          * Set the mount level flags.
 655          */
 656         if (flags & MNT_RDONLY)
 657                 mp->mnt_flag |= MNT_RDONLY;
 658         else if (mp->mnt_flag & MNT_RDONLY) {
 659                 // disallow read/write upgrades of file systems that
 660                 // had the TYPENAME_OVERRIDE feature set.
 661                 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
 662                         error = EPERM;
 663                         goto out1;
 664                 }
 665                 mp->mnt_kern_flag |= MNTK_WANTRDWR;
 666         }
 667         mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 668                           MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 669                           MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
 670                           MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
 671                           MNT_QUARANTINE | MNT_CPROTECT);
 672         mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 673                                  MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 674                                  MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
 675                                  MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
 676                                  MNT_QUARANTINE | MNT_CPROTECT);
 677
 678 #if CONFIG_MACF
 679         if (flags & MNT_MULTILABEL) {
 680                 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
 681                         error = EINVAL;
 682                         goto out1;
 683                 }
 684                 mp->mnt_flag |= MNT_MULTILABEL;
 685         }
 686 #endif
 687         /*
 688          * Process device path for local file systems if requested
 689          */
 690         if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
 691                 if (vfs_context_is64bit(ctx)) {
 692                         if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
 693                                 goto out1;
 694                         fsmountargs += sizeof(devpath);
 695                 } else {
 696                         user32_addr_t tmp;
 697                         if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
 698                                 goto out1;
 699                         /* munge into LP64 addr */
 700                         devpath = CAST_USER_ADDR_T(tmp);
 701                         fsmountargs += sizeof(tmp);
 702                 }
 703
 704                 /* Lookup device and authorize access to it */
 705                 if ((devpath)) {
 706                         struct nameidata nd;
 707
 708                         NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
 709                         if ( (error = namei(&nd)) )
 710                                 goto out1;
 711
 712                         strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
 713                         devvp = nd.ni_vp;
 714
 715                         nameidone(&nd);
 716
 717                         if (devvp->v_type != VBLK) {
 718                                 error = ENOTBLK;
 719                                 goto out2;
 720                         }
 721                         if (major(devvp->v_rdev) >= nblkdev) {
 722                                 error = ENXIO;
 723                                 goto out2;
 724                         }
 725                         /*
 726                         * If mount by non-root, then verify that user has necessary
 727                         * permissions on the device.
 728                         */
 729                         if (suser(vfs_context_ucred(ctx), NULL) != 0) {
 730                                 mode_t accessmode = KAUTH_VNODE_READ_DATA;
 731
 732                                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
 733                                         accessmode |= KAUTH_VNODE_WRITE_DATA;
 734                                 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
 735                                         goto out2;
 736                         }
 737                 }
 738                 /* On first mount, preflight and open device */
 739                 if (devpath && ((flags & MNT_UPDATE) == 0)) {
 740                         if ( (error = vnode_ref(devvp)) )
 741                                 goto out2;
 742                         /*
 743                         * Disallow multiple mounts of the same device.
 744                         * Disallow mounting of a device that is currently in use
 745                         * (except for root, which might share swap device for miniroot).
 746                         * Flush out any old buffers remaining from a previous use.
 747                         */
 748                         if ( (error = vfs_mountedon(devvp)) )
 749                                 goto out3;
 750
 751                         if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
 752                                 error = EBUSY;
 753                                 goto out3;
 754                         }
 755                         if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
 756                                 error = ENOTBLK;
 757                                 goto out3;
 758                         }
 759                         if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
 760                                 goto out3;
 761
 762                         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 763 #if CONFIG_MACF
 764                         error = mac_vnode_check_open(ctx,
 765                             devvp,
 766                             ronly ? FREAD : FREAD|FWRITE);
 767                         if (error)
 768                                 goto out3;
 769 #endif /* MAC */
 770                         if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
 771                                 goto out3;
 772
 773                         mp->mnt_devvp = devvp;
 774                         device_vnode = devvp;
 775
 776                 } else if ((mp->mnt_flag & MNT_RDONLY) &&
 777                            (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
 778                            (device_vnode = mp->mnt_devvp)) {
 779                         dev_t dev;
 780                         int maj;
 781                         /*
 782                          * If upgrade to read-write by non-root, then verify
 783                          * that user has necessary permissions on the device.
 784                          */
 785                         vnode_getalways(device_vnode);
 786
 787                         if (suser(vfs_context_ucred(ctx), NULL) &&
 788                             (error = vnode_authorize(device_vnode, NULL,
 789                              KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
 790                              ctx)) != 0) {
 791                                 vnode_put(device_vnode);
 792                                 goto out2;
 793                         }
 794
 795                         /* Tell the device that we're upgrading */
 796                         dev = (dev_t)device_vnode->v_rdev;
 797                         maj = major(dev);
 798
 799                         if ((u_int)maj >= (u_int)nblkdev)
 800                                 panic("Volume mounted on a device with invalid major number.");
 801
 802                         error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
 803                         vnode_put(device_vnode);
 804                         device_vnode = NULLVP;
 805                         if (error != 0) {
 806                                 goto out2;
 807                         }
 808                 }
 809         }
 810 #if CONFIG_MACF
 811         if ((flags & MNT_UPDATE) == 0) {
 812                 mac_mount_label_init(mp);
 813                 mac_mount_label_associate(ctx, mp);
 814         }
 815         if (labelstr) {
 816                 if ((flags & MNT_UPDATE) != 0) {
 817                         error = mac_mount_check_label_update(ctx, mp);
 818                         if (error != 0)
 819                                 goto out3;
 820                 }
 821         }
 822 #endif
 823         /*
 824          * Mount the filesystem.
 825          */
 826         error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
 827
 828         if (flags & MNT_UPDATE) {
 829                 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
 830                         mp->mnt_flag &= ~MNT_RDONLY;
 831                 mp->mnt_flag &=~
 832                     (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
 833                 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
 834                 if (error)
 835                         mp->mnt_flag = flag;  /* restore flag value */
 836                 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
 837                 lck_rw_done(&mp->mnt_rwlock);
 838                 is_rwlock_locked = FALSE;
 839                 if (!error)
 840                         enablequotas(mp, ctx);
 841                 goto exit;
 842         }
 843
 844         /*
 845          * Put the new filesystem on the mount list after root.
 846          */
 847         if (error == 0) {
 848                 struct vfs_attr vfsattr;
 849 #if CONFIG_MACF
 850                 if (vfs_flags(mp) & MNT_MULTILABEL) {
 851                         error = VFS_ROOT(mp, &rvp, ctx);
 852                         if (error) {
 853                                 printf("%s() VFS_ROOT returned %d\n", __func__, error);
 854                                 goto out3;
 855                         }
 856                         error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
 857                         /*
 858                          * drop reference provided by VFS_ROOT
 859                          */
 860                         vnode_put(rvp);
 861
 862                         if (error)
 863                                 goto out3;
 864                 }
 865 #endif  /* MAC */
 866
 867                 vnode_lock_spin(vp);
 868                 CLR(vp->v_flag, VMOUNT);
 869                 vp->v_mountedhere = mp;
 870                 vnode_unlock(vp);
 871
 872                 /*
 873                  * taking the name_cache_lock exclusively will
 874                  * insure that everyone is out of the fast path who
 875                  * might be trying to use a now stale copy of
 876                  * vp->v_mountedhere->mnt_realrootvp
 877                  * bumping mount_generation causes the cached values
 878                  * to be invalidated
 879                  */
 880                 name_cache_lock();
 881                 mount_generation++;
 882                 name_cache_unlock();
 883
 884                 error = vnode_ref(vp);
 885                 if (error != 0) {
 886                         goto out4;
 887                 }
 888
 889                 have_usecount = TRUE;
 890
 891                 error = checkdirs(vp, ctx);
 892                 if (error != 0)  {
 893                         /* Unmount the filesystem as cdir/rdirs cannot be updated */
 894                         goto out4;
 895                 }
 896                 /*
 897                  * there is no cleanup code here so I have made it void
 898                  * we need to revisit this
 899                  */
 900                 (void)VFS_START(mp, 0, ctx);
 901
 902                 if (mount_list_add(mp) != 0) {
 903                         /*
 904                          * The system is shutting down trying to umount
 905                          * everything, so fail with a plausible errno.
 906                          */
 907                         error = EBUSY;
 908                         goto out4;
 909                 }
 910                 lck_rw_done(&mp->mnt_rwlock);
 911                 is_rwlock_locked = FALSE;
 912
 913                 /* Check if this mounted file system supports EAs or named streams. */
 914                 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
 915                 VFSATTR_INIT(&vfsattr);
 916                 VFSATTR_WANTED(&vfsattr, f_capabilities);
 917                 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
 918                     vfs_getattr(mp, &vfsattr, ctx) == 0 &&
 919                     VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
 920                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
 921                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
 922                                 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 923                         }
 924 #if NAMEDSTREAMS
 925                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
 926                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
 927                                 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
 928                         }
 929 #endif
 930                         /* Check if this file system supports path from id lookups. */
 931                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
 932                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
 933                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 934                         } else if (mp->mnt_flag & MNT_DOVOLFS) {
 935                                 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
 936                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 937                         }
 938                 }
 939                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
 940                         mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 941                 }
 942                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
 943                         mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
 944                 }
 945                 /* increment the operations count */
 946                 OSAddAtomic(1, &vfs_nummntops);
 947                 enablequotas(mp, ctx);
 948
 949                 if (device_vnode) {
 950                         device_vnode->v_specflags |= SI_MOUNTEDON;
 951
 952                         /*
 953                          *   cache the IO attributes for the underlying physical media...
 954                          *   an error return indicates the underlying driver doesn't
 955                          *   support all the queries necessary... however, reasonable
 956                          *   defaults will have been set, so no reason to bail or care
 957                          */
 958                         vfs_init_io_attributes(device_vnode, mp);
 959                 }
 960
 961                 /* Now that mount is setup, notify the listeners */
 962                 vfs_notify_mount(pvp);
 963         } else {
 964                 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
 965                 if (mp->mnt_vnodelist.tqh_first != NULL) {
 966                         panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
 967                                         mp->mnt_vtable->vfc_name, error);
 968                 }
 969
 970                 vnode_lock_spin(vp);
 971                 CLR(vp->v_flag, VMOUNT);
 972                 vnode_unlock(vp);
 973                 mount_list_lock();
 974                 mp->mnt_vtable->vfc_refcount--;
 975                 mount_list_unlock();
 976
 977                 if (device_vnode ) {
 978                         vnode_rele(device_vnode);
 979                         VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
 980                 }
 981                 lck_rw_done(&mp->mnt_rwlock);
 982                 is_rwlock_locked = FALSE;
 983
 984                 /*
 985                  * if we get here, we have a mount structure that needs to be freed,
 986                  * but since the coveredvp hasn't yet been updated to point at it,
 987                  * no need to worry about other threads holding a crossref on this mp
 988                  * so it's ok to just free it
 989                  */
 990                 mount_lock_destroy(mp);
 991 #if CONFIG_MACF
 992                 mac_mount_label_destroy(mp);
 993 #endif
 994                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
 995         }
 996 exit:
 997         /*
 998          * drop I/O count on the device vp if there was one
 999          */
1000         if (devpath && devvp)
1001                 vnode_put(devvp);
1002
1003         return(error);
1004
1005 /* Error condition exits */
1006 out4:
1007         (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
1008
1009         /*
1010          * If the mount has been placed on the covered vp,
1011          * it may have been discovered by now, so we have
1012          * to treat this just like an unmount
1013          */
1014         mount_lock_spin(mp);
1015         mp->mnt_lflag |= MNT_LDEAD;
1016         mount_unlock(mp);
1017
1018         if (device_vnode != NULLVP) {
1019                 vnode_rele(device_vnode);
1020                 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1021                        ctx);
1022                 did_rele = TRUE;
1023         }
1024
1025         vnode_lock_spin(vp);
1026
1027         mp->mnt_crossref++;
1028         vp->v_mountedhere = (mount_t) 0;
1029
1030         vnode_unlock(vp);
1031
1032         if (have_usecount) {
1033                 vnode_rele(vp);
1034         }
1035 out3:
1036         if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
1037                 vnode_rele(devvp);
1038 out2:
1039         if (devpath && devvp)
1040                 vnode_put(devvp);
1041 out1:
1042         /* Release mnt_rwlock only when it was taken */
1043         if (is_rwlock_locked == TRUE) {
1044                 lck_rw_done(&mp->mnt_rwlock);
1045         }
1046
1047         if (mntalloc) {
1048                 if (mp->mnt_crossref)
1049                         mount_dropcrossref(mp, vp, 0);
1050                 else {
1051                         mount_lock_destroy(mp);
1052 #if CONFIG_MACF
1053                         mac_mount_label_destroy(mp);
1054 #endif
1055                         FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1056                 }
1057         }
1058         if (vfsp_ref) {
1059                 mount_list_lock();
1060                 vfsp->vfc_refcount--;
1061                 mount_list_unlock();
1062         }
1063
1064         return(error);
1065 }
1066
1067 /*
1068  * Flush in-core data, check for competing mount attempts,
1069  * and set VMOUNT
1070  */
1071 int
1072 prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
1073 {
1074 #if !CONFIG_MACF
1075 #pragma unused(cnp,fsname)
1076 #endif
1077         struct vnode_attr va;
1078         int error;
1079
1080         if (!skip_auth) {
1081                 /*
1082                  * If the user is not root, ensure that they own the directory
1083                  * onto which we are attempting to mount.
1084                  */
1085                 VATTR_INIT(&va);
1086                 VATTR_WANTED(&va, va_uid);
1087                 if ((error = vnode_getattr(vp, &va, ctx)) ||
1088                                 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1089                                  (!vfs_context_issuser(ctx)))) {
1090                         error = EPERM;
1091                         goto out;
1092                 }
1093         }
1094
1095         if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1096                 goto out;
1097
1098         if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1099                 goto out;
1100
1101         if (vp->v_type != VDIR) {
1102                 error = ENOTDIR;
1103                 goto out;
1104         }
1105
1106         if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1107                 error = EBUSY;
1108                 goto out;
1109         }
1110
1111 #if CONFIG_MACF
1112         error = mac_mount_check_mount(ctx, vp,
1113             cnp, fsname);
1114         if (error != 0)
1115                 goto out;
1116 #endif
1117
1118         vnode_lock_spin(vp);
1119         SET(vp->v_flag, VMOUNT);
1120         vnode_unlock(vp);
1121
1122 out:
1123         return error;
1124 }
1125
1126 #if CONFIG_IMGSRC_ACCESS
1127
1128 #if DEBUG
1129 #define IMGSRC_DEBUG(args...) printf(args)
1130 #else
1131 #define IMGSRC_DEBUG(args...) do { } while(0)
1132 #endif
1133
1134 static int
1135 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1136 {
1137         struct nameidata nd;
1138         vnode_t vp, realdevvp;
1139         mode_t accessmode;
1140         int error;
1141
1142         NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1143         if ( (error = namei(&nd)) ) {
1144                 IMGSRC_DEBUG("namei() failed with %d\n", error);
1145                 return error;
1146         }
1147
1148         vp = nd.ni_vp;
1149
1150         if (!vnode_isblk(vp)) {
1151                 IMGSRC_DEBUG("Not block device.\n");
1152                 error = ENOTBLK;
1153                 goto out;
1154         }
1155
1156         realdevvp = mp->mnt_devvp;
1157         if (realdevvp == NULLVP) {
1158                 IMGSRC_DEBUG("No device backs the mount.\n");
1159                 error = ENXIO;
1160                 goto out;
1161         }
1162
1163         error = vnode_getwithref(realdevvp);
1164         if (error != 0) {
1165                 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1166                 goto out;
1167         }
1168
1169         if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1170                 IMGSRC_DEBUG("Wrong dev_t.\n");
1171                 error = ENXIO;
1172                 goto out1;
1173         }
1174
1175         strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1176
1177         /*
1178          * If mount by non-root, then verify that user has necessary
1179          * permissions on the device.
1180          */
1181         if (!vfs_context_issuser(ctx)) {
1182                 accessmode = KAUTH_VNODE_READ_DATA;
1183                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1184                         accessmode |= KAUTH_VNODE_WRITE_DATA;
1185                 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1186                         IMGSRC_DEBUG("Access denied.\n");
1187                         goto out1;
1188                 }
1189         }
1190
1191         *devvpp = vp;
1192
1193 out1:
1194         vnode_put(realdevvp);
1195 out:
1196         nameidone(&nd);
1197         if (error) {
1198                 vnode_put(vp);
1199         }
1200
1201         return error;
1202 }
1203
1204 /*
1205  * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1206  * and call checkdirs()
1207  */
1208 static int
1209 place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1210 {
1211         int error;
1212
1213         mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1214
1215         vnode_lock_spin(vp);
1216         CLR(vp->v_flag, VMOUNT);
1217         vp->v_mountedhere = mp;
1218         vnode_unlock(vp);
1219
1220         /*
1221          * taking the name_cache_lock exclusively will
1222          * insure that everyone is out of the fast path who
1223          * might be trying to use a now stale copy of
1224          * vp->v_mountedhere->mnt_realrootvp
1225          * bumping mount_generation causes the cached values
1226          * to be invalidated
1227          */
1228         name_cache_lock();
1229         mount_generation++;
1230         name_cache_unlock();
1231
1232         error = vnode_ref(vp);
1233         if (error != 0) {
1234                 goto out;
1235         }
1236
1237         error = checkdirs(vp, ctx);
1238         if (error != 0)  {
1239                 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1240                 vnode_rele(vp);
1241                 goto out;
1242         }
1243
1244 out:
1245         if (error != 0) {
1246                 mp->mnt_vnodecovered = NULLVP;
1247         }
1248         return error;
1249 }
1250
1251 static void
1252 undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1253 {
1254         vnode_rele(vp);
1255         vnode_lock_spin(vp);
1256         vp->v_mountedhere = (mount_t)NULL;
1257         vnode_unlock(vp);
1258
1259         mp->mnt_vnodecovered = NULLVP;
1260 }
1261
1262 static int
1263 mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1264 {
1265         int error;
1266
1267         /* unmount in progress return error */
1268         mount_lock_spin(mp);
1269         if (mp->mnt_lflag & MNT_LUNMOUNT) {
1270                 mount_unlock(mp);
1271                 return EBUSY;
1272         }
1273         mount_unlock(mp);
1274         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1275
1276         /*
1277          * We only allow the filesystem to be reloaded if it
1278          * is currently mounted read-only.
1279          */
1280         if ((flags & MNT_RELOAD) &&
1281                         ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1282                 error = ENOTSUP;
1283                 goto out;
1284         }
1285
1286         /*
1287          * Only root, or the user that did the original mount is
1288          * permitted to update it.
1289          */
1290         if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1291                         (!vfs_context_issuser(ctx))) {
1292                 error = EPERM;
1293                 goto out;
1294         }
1295 #if CONFIG_MACF
1296         error = mac_mount_check_remount(ctx, mp);
1297         if (error != 0) {
1298                 goto out;
1299         }
1300 #endif
1301
1302 out:
1303         if (error) {
1304                 lck_rw_done(&mp->mnt_rwlock);
1305         }
1306
1307         return error;
1308 }
1309
1310 static void
1311 mount_end_update(mount_t mp)
1312 {
1313         lck_rw_done(&mp->mnt_rwlock);
1314 }
1315
1316 static int
1317 get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1318 {
1319         vnode_t vp;
1320
1321         if (height >= MAX_IMAGEBOOT_NESTING) {
1322                 return EINVAL;
1323         }
1324
1325         vp = imgsrc_rootvnodes[height];
1326         if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1327                 *rvpp = vp;
1328                 return 0;
1329         } else {
1330                 return ENOENT;
1331         }
1332 }
1333
1334 static int
1335 relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1336                 const char *fsname, vfs_context_t ctx,
1337                 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
1338 {
1339         int error;
1340         mount_t mp;
1341         boolean_t placed = FALSE;
1342         vnode_t devvp = NULLVP;
1343         struct vfstable *vfsp;
1344         user_addr_t devpath;
1345         char *old_mntonname;
1346         vnode_t rvp;
1347         uint32_t height;
1348         uint32_t flags;
1349
1350         /* If we didn't imageboot, nothing to move */
1351         if (imgsrc_rootvnodes[0] == NULLVP) {
1352                 return EINVAL;
1353         }
1354
1355         /* Only root can do this */
1356         if (!vfs_context_issuser(ctx)) {
1357                 return EPERM;
1358         }
1359
1360         IMGSRC_DEBUG("looking for root vnode.\n");
1361
1362         /*
1363          * Get root vnode of filesystem we're moving.
1364          */
1365         if (by_index) {
1366                 if (is64bit) {
1367                         struct user64_mnt_imgsrc_args mia64;
1368                         error = copyin(fsmountargs, &mia64, sizeof(mia64));
1369                         if (error != 0) {
1370                                 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1371                                 return error;
1372                         }
1373
1374                         height = mia64.mi_height;
1375                         flags = mia64.mi_flags;
1376                         devpath = mia64.mi_devpath;
1377                 } else {
1378                         struct user32_mnt_imgsrc_args mia32;
1379                         error = copyin(fsmountargs, &mia32, sizeof(mia32));
1380                         if (error != 0) {
1381                                 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1382                                 return error;
1383                         }
1384
1385                         height = mia32.mi_height;
1386                         flags = mia32.mi_flags;
1387                         devpath = mia32.mi_devpath;
1388                 }
1389         } else {
1390                 /*
1391                  * For binary compatibility--assumes one level of nesting.
1392                  */
1393                 if (is64bit) {
1394                         if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1395                                 return error;
1396                 } else {
1397                         user32_addr_t tmp;
1398                         if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1399                                 return error;
1400
1401                         /* munge into LP64 addr */
1402                         devpath = CAST_USER_ADDR_T(tmp);
1403                 }
1404
1405                 height = 0;
1406                 flags = 0;
1407         }
1408
1409         if (flags != 0) {
1410                 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1411                 return EINVAL;
1412         }
1413
1414         error = get_imgsrc_rootvnode(height, &rvp);
1415         if (error != 0) {
1416                 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
1417                 return error;
1418         }
1419
1420         IMGSRC_DEBUG("got root vnode.\n");
1421
1422         MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1423
1424         /* Can only move once */
1425         mp = vnode_mount(rvp);
1426         if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1427                 IMGSRC_DEBUG("Already moved.\n");
1428                 error = EBUSY;
1429                 goto out0;
1430         }
1431
1432         IMGSRC_DEBUG("Starting updated.\n");
1433
1434         /* Get exclusive rwlock on mount, authorize update on mp */
1435         error = mount_begin_update(mp , ctx, 0);
1436         if (error != 0) {
1437                 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
1438                 goto out0;
1439         }
1440
1441         /*
1442          * It can only be moved once.  Flag is set under the rwlock,
1443          * so we're now safe to proceed.
1444          */
1445         if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1446                 IMGSRC_DEBUG("Already moved [2]\n");
1447                 goto out1;
1448         }
1449
1450
1451         IMGSRC_DEBUG("Preparing coveredvp.\n");
1452
1453         /* Mark covered vnode as mount in progress, authorize placing mount on top */
1454         error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
1455         if (error != 0) {
1456                 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
1457                 goto out1;
1458         }
1459
1460         IMGSRC_DEBUG("Covered vp OK.\n");
1461
1462         /* Sanity check the name caller has provided */
1463         vfsp = mp->mnt_vtable;
1464         if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1465                 IMGSRC_DEBUG("Wrong fs name.\n");
1466                 error = EINVAL;
1467                 goto out2;
1468         }
1469
1470         /* Check the device vnode and update mount-from name, for local filesystems */
1471         if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1472                 IMGSRC_DEBUG("Local, doing device validation.\n");
1473
1474                 if (devpath != USER_ADDR_NULL) {
1475                         error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1476                         if (error) {
1477                                 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1478                                 goto out2;
1479                         }
1480
1481                         vnode_put(devvp);
1482                 }
1483         }
1484
1485         /*
1486          * Place mp on top of vnode, ref the vnode,  call checkdirs(),
1487          * and increment the name cache's mount generation
1488          */
1489
1490         IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1491         error = place_mount_and_checkdirs(mp, vp, ctx);
1492         if (error != 0) {
1493                 goto out2;
1494         }
1495
1496         placed = TRUE;
1497
1498         strncpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1499         strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1500
1501         /* Forbid future moves */
1502         mount_lock(mp);
1503         mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1504         mount_unlock(mp);
1505
1506         /* Finally, add to mount list, completely ready to go */
1507         if (mount_list_add(mp) != 0) {
1508                 /*
1509                  * The system is shutting down trying to umount
1510                  * everything, so fail with a plausible errno.
1511                  */
1512                 error = EBUSY;
1513                 goto out3;
1514         }
1515
1516         mount_end_update(mp);
1517         vnode_put(rvp);
1518         FREE(old_mntonname, M_TEMP);
1519
1520         vfs_notify_mount(pvp);
1521
1522         return 0;
1523 out3:
1524         strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1525
1526         mount_lock(mp);
1527         mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1528         mount_unlock(mp);
1529
1530 out2:
1531         /*
1532          * Placing the mp on the vnode clears VMOUNT,
1533          * so cleanup is different after that point
1534          */
1535         if (placed) {
1536                 /* Rele the vp, clear VMOUNT and v_mountedhere */
1537                 undo_place_on_covered_vp(mp, vp);
1538         } else {
1539                 vnode_lock_spin(vp);
1540                 CLR(vp->v_flag, VMOUNT);
1541                 vnode_unlock(vp);
1542         }
1543 out1:
1544         mount_end_update(mp);
1545
1546 out0:
1547         vnode_put(rvp);
1548         FREE(old_mntonname, M_TEMP);
1549         return error;
1550 }
1551
1552 #endif /* CONFIG_IMGSRC_ACCESS */
1553
1554 void
1555 enablequotas(struct mount *mp, vfs_context_t ctx)
1556 {
1557         struct nameidata qnd;
1558         int type;
1559         char qfpath[MAXPATHLEN];
1560         const char *qfname = QUOTAFILENAME;
1561         const char *qfopsname = QUOTAOPSNAME;
1562         const char *qfextension[] = INITQFNAMES;
1563
1564         /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1565         if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1566                 return;
1567         }
1568         /*
1569          * Enable filesystem disk quotas if necessary.
1570          * We ignore errors as this should not interfere with final mount
1571          */
1572         for (type=0; type < MAXQUOTAS; type++) {
1573                 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1574                 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1575                        CAST_USER_ADDR_T(qfpath), ctx);
1576                 if (namei(&qnd) != 0)
1577                         continue;           /* option file to trigger quotas is not present */
1578                 vnode_put(qnd.ni_vp);
1579                 nameidone(&qnd);
1580                 snprintf(qfpath, sizeof(qfpath),  "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1581
1582                 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1583         }
1584         return;
1585 }
1586
1587
1588 static int
1589 checkdirs_callback(proc_t p, void * arg)
1590 {
1591         struct cdirargs * cdrp = (struct cdirargs * )arg;
1592         vnode_t olddp = cdrp->olddp;
1593         vnode_t newdp = cdrp->newdp;
1594         struct filedesc *fdp;
1595         vnode_t tvp;
1596         vnode_t fdp_cvp;
1597         vnode_t fdp_rvp;
1598         int cdir_changed = 0;
1599         int rdir_changed = 0;
1600
1601         /*
1602          * XXX Also needs to iterate each thread in the process to see if it
1603          * XXX is using a per-thread current working directory, and, if so,
1604          * XXX update that as well.
1605          */
1606
1607         proc_fdlock(p);
1608         fdp = p->p_fd;
1609         if (fdp == (struct filedesc *)0) {
1610                 proc_fdunlock(p);
1611                 return(PROC_RETURNED);
1612         }
1613         fdp_cvp = fdp->fd_cdir;
1614         fdp_rvp = fdp->fd_rdir;
1615         proc_fdunlock(p);
1616
1617         if (fdp_cvp == olddp) {
1618                 vnode_ref(newdp);
1619                 tvp = fdp->fd_cdir;
1620                 fdp_cvp = newdp;
1621                 cdir_changed = 1;
1622                 vnode_rele(tvp);
1623         }
1624         if (fdp_rvp == olddp) {
1625                 vnode_ref(newdp);
1626                 tvp = fdp->fd_rdir;
1627                 fdp_rvp = newdp;
1628                 rdir_changed = 1;
1629                 vnode_rele(tvp);
1630         }
1631         if (cdir_changed || rdir_changed) {
1632                 proc_fdlock(p);
1633                 fdp->fd_cdir = fdp_cvp;
1634                 fdp->fd_rdir = fdp_rvp;
1635                 proc_fdunlock(p);
1636         }
1637         return(PROC_RETURNED);
1638 }
1639
1640
1641
1642 /*
1643  * Scan all active processes to see if any of them have a current
1644  * or root directory onto which the new filesystem has just been
1645  * mounted. If so, replace them with the new mount point.
1646  */
1647 static int
1648 checkdirs(vnode_t olddp, vfs_context_t ctx)
1649 {
1650         vnode_t newdp;
1651         vnode_t tvp;
1652         int err;
1653         struct cdirargs cdr;
1654         struct uthread * uth = get_bsdthread_info(current_thread());
1655
1656         if (olddp->v_usecount == 1)
1657                 return(0);
1658         if (uth != (struct uthread *)0)
1659                 uth->uu_notrigger = 1;
1660         err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
1661         if (uth != (struct uthread *)0)
1662                 uth->uu_notrigger = 0;
1663
1664         if (err != 0) {
1665 #if DIAGNOSTIC
1666                 panic("mount: lost mount: error %d", err);
1667 #endif
1668                 return(err);
1669         }
1670
1671         cdr.olddp = olddp;
1672         cdr.newdp = newdp;
1673         /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1674         proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
1675
1676         if (rootvnode == olddp) {
1677                 vnode_ref(newdp);
1678                 tvp = rootvnode;
1679                 rootvnode = newdp;
1680                 vnode_rele(tvp);
1681         }
1682
1683         vnode_put(newdp);
1684         return(0);
1685 }
1686
1687 /*
1688  * Unmount a file system.
1689  *
1690  * Note: unmount takes a path to the vnode mounted on as argument,
1691  * not special file (as before).
1692  */
1693 /* ARGSUSED */
1694 int
1695 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1696 {
1697         vnode_t vp;
1698         struct mount *mp;
1699         int error;
1700         struct nameidata nd;
1701         vfs_context_t ctx = vfs_context_current();
1702
1703         NDINIT(&nd, LOOKUP, OP_UNMOUNT, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1704                 UIO_USERSPACE, uap->path, ctx);
1705         error = namei(&nd);
1706         if (error)
1707                 return (error);
1708         vp = nd.ni_vp;
1709         mp = vp->v_mount;
1710         nameidone(&nd);
1711
1712 #if CONFIG_MACF
1713         error = mac_mount_check_umount(ctx, mp);
1714         if (error != 0) {
1715                 vnode_put(vp);
1716                 return (error);
1717         }
1718 #endif
1719         /*
1720          * Must be the root of the filesystem
1721          */
1722         if ((vp->v_flag & VROOT) == 0) {
1723                 vnode_put(vp);
1724                 return (EINVAL);
1725         }
1726         mount_ref(mp, 0);
1727         vnode_put(vp);
1728         /* safedounmount consumes the mount ref */
1729         return (safedounmount(mp, uap->flags, ctx));
1730 }
1731
1732 int
1733 vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
1734 {
1735         mount_t mp;
1736
1737         mp = mount_list_lookupby_fsid(fsid, 0, 1);
1738         if (mp == (mount_t)0) {
1739                 return(ENOENT);
1740         }
1741         mount_ref(mp, 0);
1742         mount_iterdrop(mp);
1743         /* safedounmount consumes the mount ref */
1744         return(safedounmount(mp, flags, ctx));
1745 }
1746
1747
1748 /*
1749  * The mount struct comes with a mount ref which will be consumed.
1750  * Do the actual file system unmount, prevent some common foot shooting.
1751  */
1752 int
1753 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1754 {
1755         int error;
1756         proc_t p = vfs_context_proc(ctx);
1757
1758         /*
1759          * If the file system is not responding and MNT_NOBLOCK
1760          * is set and not a forced unmount then return EBUSY.
1761          */
1762         if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1763                 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1764                 error = EBUSY;
1765                 goto out;
1766         }
1767
1768         /*
1769          * Skip authorization if the mount is tagged as permissive and
1770          * this is not a forced-unmount attempt.
1771          */
1772         if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1773                 /*
1774                  * Only root, or the user that did the original mount is
1775                  * permitted to unmount this filesystem.
1776                  */
1777                 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1778                                 (error = suser(kauth_cred_get(), &p->p_acflag)))
1779                         goto out;
1780         }
1781         /*
1782          * Don't allow unmounting the root file system.
1783          */
1784         if (mp->mnt_flag & MNT_ROOTFS) {
1785                 error = EBUSY; /* the root is always busy */
1786                 goto out;
1787         }
1788
1789 #ifdef CONFIG_IMGSRC_ACCESS
1790         if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1791                 error = EBUSY;
1792                 goto out;
1793         }
1794 #endif /* CONFIG_IMGSRC_ACCESS */
1795
1796         return (dounmount(mp, flags, 1, ctx));
1797
1798 out:
1799         mount_drop(mp, 0);
1800         return(error);
1801 }
1802
1803 /*
1804  * Do the actual file system unmount.
1805  */
1806 int
1807 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1808 {
1809         vnode_t coveredvp = (vnode_t)0;
1810         int error;
1811         int needwakeup = 0;
1812         int forcedunmount = 0;
1813         int lflags = 0;
1814         struct vnode *devvp = NULLVP;
1815 #if CONFIG_TRIGGERS
1816         proc_t p = vfs_context_proc(ctx);
1817         int did_vflush = 0;
1818         int pflags_save = 0;
1819 #endif /* CONFIG_TRIGGERS */
1820
1821         if (flags & MNT_FORCE)
1822                 forcedunmount = 1;
1823
1824         mount_lock(mp);
1825         /* XXX post jaguar fix LK_DRAIN - then clean this up */
1826         if ((flags & MNT_FORCE)) {
1827                 mp->mnt_kern_flag |= MNTK_FRCUNMOUNT;
1828                 mp->mnt_lflag |= MNT_LFORCE;
1829         }
1830         if (mp->mnt_lflag & MNT_LUNMOUNT) {
1831                 mp->mnt_lflag |= MNT_LWAIT;
1832                 if(withref != 0)
1833                         mount_drop(mp, 1);
1834                 msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "dounmount", NULL);
1835                 /*
1836                  * The prior unmount attempt has probably succeeded.
1837                  * Do not dereference mp here - returning EBUSY is safest.
1838                  */
1839                 return (EBUSY);
1840         }
1841
1842 #if CONFIG_TRIGGERS
1843         if (flags & MNT_NOBLOCK && p != kernproc)
1844                 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
1845 #endif
1846
1847         mp->mnt_kern_flag |= MNTK_UNMOUNT;
1848         mp->mnt_lflag |= MNT_LUNMOUNT;
1849         mp->mnt_flag &=~ MNT_ASYNC;
1850         /*
1851          * anyone currently in the fast path that
1852          * trips over the cached rootvp will be
1853          * dumped out and forced into the slow path
1854          * to regenerate a new cached value
1855          */
1856         mp->mnt_realrootvp = NULLVP;
1857         mount_unlock(mp);
1858
1859         /*
1860          * taking the name_cache_lock exclusively will
1861          * insure that everyone is out of the fast path who
1862          * might be trying to use a now stale copy of
1863          * vp->v_mountedhere->mnt_realrootvp
1864          * bumping mount_generation causes the cached values
1865          * to be invalidated
1866          */
1867         name_cache_lock();
1868         mount_generation++;
1869         name_cache_unlock();
1870
1871
1872         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1873         if (withref != 0)
1874                 mount_drop(mp, 0);
1875 #if CONFIG_FSE
1876         fsevent_unmount(mp);  /* has to come first! */
1877 #endif
1878         error = 0;
1879         if (forcedunmount == 0) {
1880                 ubc_umount(mp); /* release cached vnodes */
1881                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1882                         error = VFS_SYNC(mp, MNT_WAIT, ctx);
1883                         if (error) {
1884                                 mount_lock(mp);
1885                                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1886                                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1887                                 mp->mnt_lflag &= ~MNT_LFORCE;
1888                                 goto out;
1889                         }
1890                 }
1891         }
1892
1893 #if CONFIG_TRIGGERS
1894         vfs_nested_trigger_unmounts(mp, flags, ctx);
1895         did_vflush = 1;
1896 #endif
1897         if (forcedunmount)
1898                 lflags |= FORCECLOSE;
1899         error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM  | SKIPROOT | lflags);
1900         if ((forcedunmount == 0) && error) {
1901                 mount_lock(mp);
1902                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1903                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1904                 mp->mnt_lflag &= ~MNT_LFORCE;
1905                 goto out;
1906         }
1907
1908         /* make sure there are no one in the mount iterations or lookup */
1909         mount_iterdrain(mp);
1910
1911         error = VFS_UNMOUNT(mp, flags, ctx);
1912         if (error) {
1913                 mount_iterreset(mp);
1914                 mount_lock(mp);
1915                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1916                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1917                 mp->mnt_lflag &= ~MNT_LFORCE;
1918                 goto out;
1919         }
1920
1921         /* increment the operations count */
1922         if (!error)
1923                 OSAddAtomic(1, &vfs_nummntops);
1924
1925         if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1926                 /* hold an io reference and drop the usecount before close */
1927                 devvp = mp->mnt_devvp;
1928                 vnode_getalways(devvp);
1929                 vnode_rele(devvp);
1930                 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1931                        ctx);
1932                 vnode_clearmountedon(devvp);
1933                 vnode_put(devvp);
1934         }
1935         lck_rw_done(&mp->mnt_rwlock);
1936         mount_list_remove(mp);
1937         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1938
1939         /* mark the mount point hook in the vp but not drop the ref yet */
1940         if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
1941                 vnode_getwithref(coveredvp);
1942                 vnode_lock_spin(coveredvp);
1943
1944                 mp->mnt_crossref++;
1945                 coveredvp->v_mountedhere = (struct mount *)0;
1946
1947                 vnode_unlock(coveredvp);
1948                 vnode_put(coveredvp);
1949         }
1950
1951         mount_list_lock();
1952         mp->mnt_vtable->vfc_refcount--;
1953         mount_list_unlock();
1954
1955         cache_purgevfs(mp);     /* remove cache entries for this file sys */
1956         vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
1957         mount_lock(mp);
1958         mp->mnt_lflag |= MNT_LDEAD;
1959
1960         if (mp->mnt_lflag & MNT_LWAIT) {
1961                 /*
1962                  * do the wakeup here
1963                  * in case we block in mount_refdrain
1964                  * which will drop the mount lock
1965                  * and allow anyone blocked in vfs_busy
1966                  * to wakeup and see the LDEAD state
1967                  */
1968                 mp->mnt_lflag &= ~MNT_LWAIT;
1969                 wakeup((caddr_t)mp);
1970         }
1971         mount_refdrain(mp);
1972 out:
1973         if (mp->mnt_lflag & MNT_LWAIT) {
1974                 mp->mnt_lflag &= ~MNT_LWAIT;
1975                 needwakeup = 1;
1976         }
1977
1978 #if CONFIG_TRIGGERS
1979         if (flags & MNT_NOBLOCK && p != kernproc) {
1980                 // Restore P_NOREMOTEHANG bit to its previous value
1981                 if ((pflags_save & P_NOREMOTEHANG) == 0)
1982                         OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
1983         }
1984
1985         /*
1986          * Callback and context are set together under the mount lock, and
1987          * never cleared, so we're safe to examine them here, drop the lock,
1988          * and call out.
1989          */
1990         if (mp->mnt_triggercallback != NULL) {
1991                 mount_unlock(mp);
1992                 if (error == 0) {
1993                         mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
1994                 } else if (did_vflush) {
1995                         mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
1996                 }
1997         } else {
1998                 mount_unlock(mp);
1999         }
2000 #else
2001         mount_unlock(mp);
2002 #endif /* CONFIG_TRIGGERS */
2003
2004         lck_rw_done(&mp->mnt_rwlock);
2005
2006         if (needwakeup)
2007                 wakeup((caddr_t)mp);
2008
2009         if (!error) {
2010                 if ((coveredvp != NULLVP)) {
2011                         vnode_t pvp;
2012
2013                         vnode_getwithref(coveredvp);
2014                         pvp = vnode_getparent(coveredvp);
2015                         vnode_rele(coveredvp);
2016
2017                         mount_dropcrossref(mp, coveredvp, 0);
2018 #if CONFIG_TRIGGERS
2019                         if (coveredvp->v_resolve)
2020                                 vnode_trigger_rearm(coveredvp, ctx);
2021 #endif
2022                         vnode_put(coveredvp);
2023
2024                         if (pvp) {
2025                                 lock_vnode_and_post(pvp, NOTE_WRITE);
2026                                 vnode_put(pvp);
2027                         }
2028                 } else if (mp->mnt_flag & MNT_ROOTFS) {
2029                                 mount_lock_destroy(mp);
2030 #if CONFIG_MACF
2031                                 mac_mount_label_destroy(mp);
2032 #endif
2033                                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2034                 } else
2035                         panic("dounmount: no coveredvp");
2036         }
2037         return (error);
2038 }
2039
2040 void
2041 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2042 {
2043         vnode_lock(dp);
2044         mp->mnt_crossref--;
2045
2046         if (mp->mnt_crossref < 0)
2047                 panic("mount cross refs -ve");
2048
2049         if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2050
2051                 if (need_put)
2052                         vnode_put_locked(dp);
2053                 vnode_unlock(dp);
2054
2055                 mount_lock_destroy(mp);
2056 #if CONFIG_MACF
2057                 mac_mount_label_destroy(mp);
2058 #endif
2059                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2060                 return;
2061         }
2062         if (need_put)
2063                 vnode_put_locked(dp);
2064         vnode_unlock(dp);
2065 }
2066
2067
2068 /*
2069  * Sync each mounted filesystem.
2070  */
2071 #if DIAGNOSTIC
2072 int syncprt = 0;
2073 struct ctldebug debug0 = { "syncprt", &syncprt };
2074 #endif
2075
2076 int print_vmpage_stat=0;
2077
2078 static int
2079 sync_callback(mount_t mp, void * arg)
2080 {
2081         int asyncflag;
2082
2083         if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2084                         asyncflag = mp->mnt_flag & MNT_ASYNC;
2085                         mp->mnt_flag &= ~MNT_ASYNC;
2086                         VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_current());
2087                         if (asyncflag)
2088                                 mp->mnt_flag |= MNT_ASYNC;
2089         }
2090         return(VFS_RETURNED);
2091 }
2092
2093
2094 /* ARGSUSED */
2095 int
2096 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
2097 {
2098         vfs_iterate(LK_NOWAIT, sync_callback, (void *)0);
2099
2100         if(print_vmpage_stat) {
2101                 vm_countdirtypages();
2102         }
2103
2104 #if DIAGNOSTIC
2105         if (syncprt)
2106                 vfs_bufstats();
2107 #endif /* DIAGNOSTIC */
2108         return (0);
2109 }
2110
2111 /*
2112  * Change filesystem quotas.
2113  */
2114 #if QUOTA
2115 static int quotactl_funneled(proc_t p, struct quotactl_args *uap, int32_t *retval);
2116
2117 int
2118 quotactl(proc_t p, struct quotactl_args *uap, int32_t *retval)
2119 {
2120         boolean_t funnel_state;
2121         int error;
2122
2123         funnel_state = thread_funnel_set(kernel_flock, TRUE);
2124         error = quotactl_funneled(p, uap, retval);
2125         thread_funnel_set(kernel_flock, funnel_state);
2126         return(error);
2127 }
2128
2129 static int
2130 quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
2131 {
2132         struct mount *mp;
2133         int error, quota_cmd, quota_status;
2134         caddr_t datap;
2135         size_t fnamelen;
2136         struct nameidata nd;
2137         vfs_context_t ctx = vfs_context_current();
2138         struct dqblk my_dqblk;
2139
2140         AUDIT_ARG(uid, uap->uid);
2141         AUDIT_ARG(cmd, uap->cmd);
2142         NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2143                uap->path, ctx);
2144         error = namei(&nd);
2145         if (error)
2146                 return (error);
2147         mp = nd.ni_vp->v_mount;
2148         vnode_put(nd.ni_vp);
2149         nameidone(&nd);
2150
2151         /* copyin any data we will need for downstream code */
2152         quota_cmd = uap->cmd >> SUBCMDSHIFT;
2153
2154         switch (quota_cmd) {
2155         case Q_QUOTAON:
2156                 /* uap->arg specifies a file from which to take the quotas */
2157                 fnamelen = MAXPATHLEN;
2158                 datap = kalloc(MAXPATHLEN);
2159                 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2160                 break;
2161         case Q_GETQUOTA:
2162                 /* uap->arg is a pointer to a dqblk structure. */
2163                 datap = (caddr_t) &my_dqblk;
2164                 break;
2165         case Q_SETQUOTA:
2166         case Q_SETUSE:
2167                 /* uap->arg is a pointer to a dqblk structure. */
2168                 datap = (caddr_t) &my_dqblk;
2169                 if (proc_is64bit(p)) {
2170                         struct user_dqblk       my_dqblk64;
2171                         error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2172                         if (error == 0) {
2173                                 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2174                         }
2175                 }
2176                 else {
2177                         error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2178                 }
2179                 break;
2180         case Q_QUOTASTAT:
2181                 /* uap->arg is a pointer to an integer */
2182                 datap = (caddr_t) &quota_status;
2183                 break;
2184         default:
2185                 datap = NULL;
2186                 break;
2187         } /* switch */
2188
2189         if (error == 0) {
2190                 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
2191         }
2192
2193         switch (quota_cmd) {
2194         case Q_QUOTAON:
2195                 if (datap != NULL)
2196                         kfree(datap, MAXPATHLEN);
2197                 break;
2198         case Q_GETQUOTA:
2199                 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2200                 if (error == 0) {
2201                         if (proc_is64bit(p)) {
2202                                 struct user_dqblk       my_dqblk64;
2203                                 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2204                                 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2205                         }
2206                         else {
2207                                 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2208                         }
2209                 }
2210                 break;
2211         case Q_QUOTASTAT:
2212                 /* uap->arg is a pointer to an integer */
2213                 if (error == 0) {
2214                         error = copyout(datap, uap->arg, sizeof(quota_status));
2215                 }
2216                 break;
2217         default:
2218                 break;
2219         } /* switch */
2220
2221         return (error);
2222 }
2223 #else
2224 int
2225 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2226 {
2227         return (EOPNOTSUPP);
2228 }
2229 #endif /* QUOTA */
2230
2231 /*
2232  * Get filesystem statistics.
2233  *
2234  * Returns:     0                       Success
2235  *      namei:???
2236  *      vfs_update_vfsstat:???
2237  *      munge_statfs:EFAULT
2238  */
2239 /* ARGSUSED */
2240 int
2241 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
2242 {
2243         struct mount *mp;
2244         struct vfsstatfs *sp;
2245         int error;
2246         struct nameidata nd;
2247         vfs_context_t ctx = vfs_context_current();
2248         vnode_t vp;
2249
2250         NDINIT(&nd, LOOKUP, OP_STATFS, NOTRIGGER | FOLLOW | AUDITVNPATH1,
2251                 UIO_USERSPACE, uap->path, ctx);
2252         error = namei(&nd);
2253         if (error)
2254                 return (error);
2255         vp = nd.ni_vp;
2256         mp = vp->v_mount;
2257         sp = &mp->mnt_vfsstat;
2258         nameidone(&nd);
2259
2260         error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
2261         if (error != 0) {
2262                 vnode_put(vp);
2263                 return (error);
2264         }
2265
2266         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2267         vnode_put(vp);
2268         return (error);
2269 }
2270
2271 /*
2272  * Get filesystem statistics.
2273  */
2274 /* ARGSUSED */
2275 int
2276 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
2277 {
2278         vnode_t vp;
2279         struct mount *mp;
2280         struct vfsstatfs *sp;
2281         int error;
2282
2283         AUDIT_ARG(fd, uap->fd);
2284
2285         if ( (error = file_vnode(uap->fd, &vp)) )
2286                 return (error);
2287
2288         error = vnode_getwithref(vp);
2289         if (error) {
2290                 file_drop(uap->fd);
2291                 return (error);
2292         }
2293
2294         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2295
2296         mp = vp->v_mount;
2297         if (!mp) {
2298                 error = EBADF;
2299                 goto out;
2300         }
2301         sp = &mp->mnt_vfsstat;
2302         if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
2303                 goto out;
2304         }
2305
2306         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2307
2308 out:
2309         file_drop(uap->fd);
2310         vnode_put(vp);
2311
2312         return (error);
2313 }
2314
2315 /*
2316  * Common routine to handle copying of statfs64 data to user space
2317  */
2318 static int
2319 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2320 {
2321         int error;
2322         struct statfs64 sfs;
2323
2324         bzero(&sfs, sizeof(sfs));
2325
2326         sfs.f_bsize = sfsp->f_bsize;
2327         sfs.f_iosize = (int32_t)sfsp->f_iosize;
2328         sfs.f_blocks = sfsp->f_blocks;
2329         sfs.f_bfree = sfsp->f_bfree;
2330         sfs.f_bavail = sfsp->f_bavail;
2331         sfs.f_files = sfsp->f_files;
2332         sfs.f_ffree = sfsp->f_ffree;
2333         sfs.f_fsid = sfsp->f_fsid;
2334         sfs.f_owner = sfsp->f_owner;
2335         sfs.f_type = mp->mnt_vtable->vfc_typenum;
2336         sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2337         sfs.f_fssubtype = sfsp->f_fssubtype;
2338         if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2339                 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2340         } else {
2341                 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2342         }
2343         strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2344         strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2345
2346         error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2347
2348         return(error);
2349 }
2350
2351 /*
2352  * Get file system statistics in 64-bit mode
2353  */
2354 int
2355 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2356 {
2357         struct mount *mp;
2358         struct vfsstatfs *sp;
2359         int error;
2360         struct nameidata nd;
2361         vfs_context_t ctxp = vfs_context_current();
2362         vnode_t vp;
2363
2364         NDINIT(&nd, LOOKUP, OP_STATFS, NOTRIGGER | FOLLOW | AUDITVNPATH1,
2365                 UIO_USERSPACE, uap->path, ctxp);
2366         error = namei(&nd);
2367         if (error)
2368                 return (error);
2369         vp = nd.ni_vp;
2370         mp = vp->v_mount;
2371         sp = &mp->mnt_vfsstat;
2372         nameidone(&nd);
2373
2374         error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
2375         if (error != 0) {
2376                 vnode_put(vp);
2377                 return (error);
2378         }
2379
2380         error = statfs64_common(mp, sp, uap->buf);
2381         vnode_put(vp);
2382
2383         return (error);
2384 }
2385
2386 /*
2387  * Get file system statistics in 64-bit mode
2388  */
2389 int
2390 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2391 {
2392         struct vnode *vp;
2393         struct mount *mp;
2394         struct vfsstatfs *sp;
2395         int error;
2396
2397         AUDIT_ARG(fd, uap->fd);
2398
2399         if ( (error = file_vnode(uap->fd, &vp)) )
2400                 return (error);
2401
2402         error = vnode_getwithref(vp);
2403         if (error) {
2404                 file_drop(uap->fd);
2405                 return (error);
2406         }
2407
2408         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2409
2410         mp = vp->v_mount;
2411         if (!mp) {
2412                 error = EBADF;
2413                 goto out;
2414         }
2415         sp = &mp->mnt_vfsstat;
2416         if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2417                 goto out;
2418         }
2419
2420         error = statfs64_common(mp, sp, uap->buf);
2421
2422 out:
2423         file_drop(uap->fd);
2424         vnode_put(vp);
2425
2426         return (error);
2427 }
2428
2429 struct getfsstat_struct {
2430         user_addr_t     sfsp;
2431         user_addr_t     *mp;
2432         int             count;
2433         int             maxcount;
2434         int             flags;
2435         int             error;
2436 };
2437
2438
2439 static int
2440 getfsstat_callback(mount_t mp, void * arg)
2441 {
2442
2443         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2444         struct vfsstatfs *sp;
2445         int error, my_size;
2446         vfs_context_t ctx = vfs_context_current();
2447
2448         if (fstp->sfsp && fstp->count < fstp->maxcount) {
2449                 sp = &mp->mnt_vfsstat;
2450                 /*
2451                  * If MNT_NOWAIT is specified, do not refresh the
2452                  * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2453                  */
2454                 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2455                         (error = vfs_update_vfsstat(mp, ctx,
2456                             VFS_USER_EVENT))) {
2457                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2458                         return(VFS_RETURNED);
2459                 }
2460
2461                 /*
2462                  * Need to handle LP64 version of struct statfs
2463                  */
2464                 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
2465                 if (error) {
2466                         fstp->error = error;
2467                         return(VFS_RETURNED_DONE);
2468                 }
2469                 fstp->sfsp += my_size;
2470
2471                 if (fstp->mp) {
2472 #if CONFIG_MACF
2473                         error = mac_mount_label_get(mp, *fstp->mp);
2474                         if (error) {
2475                                 fstp->error = error;
2476                                 return(VFS_RETURNED_DONE);
2477                         }
2478 #endif
2479                         fstp->mp++;
2480                 }
2481         }
2482         fstp->count++;
2483         return(VFS_RETURNED);
2484 }
2485
2486 /*
2487  * Get statistics on all filesystems.
2488  */
2489 int
2490 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2491 {
2492         struct __mac_getfsstat_args muap;
2493
2494         muap.buf = uap->buf;
2495         muap.bufsize = uap->bufsize;
2496         muap.mac = USER_ADDR_NULL;
2497         muap.macsize = 0;
2498         muap.flags = uap->flags;
2499
2500         return (__mac_getfsstat(p, &muap, retval));
2501 }
2502
2503 /*
2504  * __mac_getfsstat: Get MAC-related file system statistics
2505  *
2506  * Parameters:    p                        (ignored)
2507  *                uap                      User argument descriptor (see below)
2508  *                retval                   Count of file system statistics (N stats)
2509  *
2510  * Indirect:      uap->bufsize             Buffer size
2511  *                uap->macsize             MAC info size
2512  *                uap->buf                 Buffer where information will be returned
2513  *                uap->mac                 MAC info
2514  *                uap->flags               File system flags
2515  *
2516  *
2517  * Returns:        0                       Success
2518  *                !0                       Not success
2519  *
2520  */
2521 int
2522 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
2523 {
2524         user_addr_t sfsp;
2525         user_addr_t *mp;
2526         size_t count, maxcount, bufsize, macsize;
2527         struct getfsstat_struct fst;
2528
2529         bufsize = (size_t) uap->bufsize;
2530         macsize = (size_t) uap->macsize;
2531
2532         if (IS_64BIT_PROCESS(p)) {
2533                 maxcount = bufsize / sizeof(struct user64_statfs);
2534         }
2535         else {
2536                 maxcount = bufsize / sizeof(struct user32_statfs);
2537         }
2538         sfsp = uap->buf;
2539         count = 0;
2540
2541         mp = NULL;
2542
2543 #if CONFIG_MACF
2544         if (uap->mac != USER_ADDR_NULL) {
2545                 u_int32_t *mp0;
2546                 int error;
2547                 unsigned int i;
2548
2549                 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2550                 if (count != maxcount)
2551                         return (EINVAL);
2552
2553                 /* Copy in the array */
2554                 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2555                 if (mp0 == NULL) {
2556                         return (ENOMEM);
2557                 }
2558
2559                 error = copyin(uap->mac, mp0, macsize);
2560                 if (error) {
2561                         FREE(mp0, M_MACTEMP);
2562                         return (error);
2563                 }
2564
2565                 /* Normalize to an array of user_addr_t */
2566                 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
2567                 if (mp == NULL) {
2568                         FREE(mp0, M_MACTEMP);
2569                         return (ENOMEM);
2570                 }
2571
2572                 for (i = 0; i < count; i++) {
2573                         if (IS_64BIT_PROCESS(p))
2574                                 mp[i] = ((user_addr_t *)mp0)[i];
2575                         else
2576                                 mp[i] = (user_addr_t)mp0[i];
2577                 }
2578                 FREE(mp0, M_MACTEMP);
2579         }
2580 #endif
2581
2582
2583         fst.sfsp = sfsp;
2584         fst.mp = mp;
2585         fst.flags = uap->flags;
2586         fst.count = 0;
2587         fst.error = 0;
2588         fst.maxcount = maxcount;
2589
2590
2591         vfs_iterate(0, getfsstat_callback, &fst);
2592
2593         if (mp)
2594                 FREE(mp, M_MACTEMP);
2595
2596         if (fst.error ) {
2597                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2598                 return(fst.error);
2599         }
2600
2601         if (fst.sfsp && fst.count > fst.maxcount)
2602                 *retval = fst.maxcount;
2603         else
2604                 *retval = fst.count;
2605         return (0);
2606 }
2607
2608 static int
2609 getfsstat64_callback(mount_t mp, void * arg)
2610 {
2611         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2612         struct vfsstatfs *sp;
2613         int error;
2614
2615         if (fstp->sfsp && fstp->count < fstp->maxcount) {
2616                 sp = &mp->mnt_vfsstat;
2617                 /*
2618                  * If MNT_NOWAIT is specified, do not refresh the fsstat
2619                  * cache. MNT_WAIT overrides MNT_NOWAIT.
2620                  *
2621                  * We treat MNT_DWAIT as MNT_WAIT for all instances of
2622                  * getfsstat, since the constants are out of the same
2623                  * namespace.
2624                  */
2625                 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2626                      (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2627                     (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2628                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2629                         return(VFS_RETURNED);
2630                 }
2631
2632                 error = statfs64_common(mp, sp, fstp->sfsp);
2633                 if (error) {
2634                         fstp->error = error;
2635                         return(VFS_RETURNED_DONE);
2636                 }
2637                 fstp->sfsp += sizeof(struct statfs64);
2638         }
2639         fstp->count++;
2640         return(VFS_RETURNED);
2641 }
2642
2643 /*
2644  * Get statistics on all file systems in 64 bit mode.
2645  */
2646 int
2647 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2648 {
2649         user_addr_t sfsp;
2650         int count, maxcount;
2651         struct getfsstat_struct fst;
2652
2653         maxcount = uap->bufsize / sizeof(struct statfs64);
2654
2655         sfsp = uap->buf;
2656         count = 0;
2657
2658         fst.sfsp = sfsp;
2659         fst.flags = uap->flags;
2660         fst.count = 0;
2661         fst.error = 0;
2662         fst.maxcount = maxcount;
2663
2664         vfs_iterate(0, getfsstat64_callback, &fst);
2665
2666         if (fst.error ) {
2667                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2668                 return(fst.error);
2669         }
2670
2671         if (fst.sfsp && fst.count > fst.maxcount)
2672                 *retval = fst.maxcount;
2673         else
2674                 *retval = fst.count;
2675
2676         return (0);
2677 }
2678
2679 /*
2680  * Change current working directory to a given file descriptor.
2681  */
2682 /* ARGSUSED */
2683 static int
2684 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
2685 {
2686         struct filedesc *fdp = p->p_fd;
2687         vnode_t vp;
2688         vnode_t tdp;
2689         vnode_t tvp;
2690         struct mount *mp;
2691         int error;
2692         vfs_context_t ctx = vfs_context_current();
2693
2694         AUDIT_ARG(fd, uap->fd);
2695         if (per_thread && uap->fd == -1) {
2696                 /*
2697                  * Switching back from per-thread to per process CWD; verify we
2698                  * in fact have one before proceeding.  The only success case
2699                  * for this code path is to return 0 preemptively after zapping
2700                  * the thread structure contents.
2701                  */
2702                 thread_t th = vfs_context_thread(ctx);
2703                 if (th) {
2704                         uthread_t uth = get_bsdthread_info(th);
2705                         tvp = uth->uu_cdir;
2706                         uth->uu_cdir = NULLVP;
2707                         if (tvp != NULLVP) {
2708                                 vnode_rele(tvp);
2709                                 return (0);
2710                         }
2711                 }
2712                 return (EBADF);
2713         }
2714
2715         if ( (error = file_vnode(uap->fd, &vp)) )
2716                 return(error);
2717         if ( (error = vnode_getwithref(vp)) ) {
2718                 file_drop(uap->fd);
2719                 return(error);
2720         }
2721
2722         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2723
2724         if (vp->v_type != VDIR) {
2725                 error = ENOTDIR;
2726                 goto out;
2727         }
2728
2729 #if CONFIG_MACF
2730         error = mac_vnode_check_chdir(ctx, vp);
2731         if (error)
2732                 goto out;
2733 #endif
2734         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2735         if (error)
2736                 goto out;
2737
2738         while (!error && (mp = vp->v_mountedhere) != NULL) {
2739                 if (vfs_busy(mp, LK_NOWAIT)) {
2740                         error = EACCES;
2741                         goto out;
2742                 }
2743                 error = VFS_ROOT(mp, &tdp, ctx);
2744                 vfs_unbusy(mp);
2745                 if (error)
2746                         break;
2747                 vnode_put(vp);
2748                 vp = tdp;
2749         }
2750         if (error)
2751                 goto out;
2752         if ( (error = vnode_ref(vp)) )
2753                 goto out;
2754         vnode_put(vp);
2755
2756         if (per_thread) {
2757                 thread_t th = vfs_context_thread(ctx);
2758                 if (th) {
2759                         uthread_t uth = get_bsdthread_info(th);
2760                         tvp = uth->uu_cdir;
2761                         uth->uu_cdir = vp;
2762                         OSBitOrAtomic(P_THCWD, &p->p_flag);
2763                 } else {
2764                         vnode_rele(vp);
2765                         return (ENOENT);
2766                 }
2767         } else {
2768                 proc_fdlock(p);
2769                 tvp = fdp->fd_cdir;
2770                 fdp->fd_cdir = vp;
2771                 proc_fdunlock(p);
2772         }
2773
2774         if (tvp)
2775                 vnode_rele(tvp);
2776         file_drop(uap->fd);
2777
2778         return (0);
2779 out:
2780         vnode_put(vp);
2781         file_drop(uap->fd);
2782
2783         return(error);
2784 }
2785
2786 int
2787 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
2788 {
2789         return common_fchdir(p, uap, 0);
2790 }
2791
2792 int
2793 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
2794 {
2795         return common_fchdir(p, (void *)uap, 1);
2796 }
2797
2798 /*
2799  * Change current working directory (".").
2800  *
2801  * Returns:     0                       Success
2802  *      change_dir:ENOTDIR
2803  *      change_dir:???
2804  *      vnode_ref:ENOENT                No such file or directory
2805  */
2806 /* ARGSUSED */
2807 static int
2808 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
2809 {
2810         struct filedesc *fdp = p->p_fd;
2811         int error;
2812         struct nameidata nd;
2813         vnode_t tvp;
2814         vfs_context_t ctx = vfs_context_current();
2815
2816         NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
2817                 UIO_USERSPACE, uap->path, ctx);
2818         error = change_dir(&nd, ctx);
2819         if (error)
2820                 return (error);
2821         if ( (error = vnode_ref(nd.ni_vp)) ) {
2822                 vnode_put(nd.ni_vp);
2823                 return (error);
2824         }
2825         /*
2826          * drop the iocount we picked up in change_dir
2827          */
2828         vnode_put(nd.ni_vp);
2829
2830         if (per_thread) {
2831                 thread_t th = vfs_context_thread(ctx);
2832                 if (th) {
2833                         uthread_t uth = get_bsdthread_info(th);
2834                         tvp = uth->uu_cdir;
2835                         uth->uu_cdir = nd.ni_vp;
2836                         OSBitOrAtomic(P_THCWD, &p->p_flag);
2837                 } else {
2838                         vnode_rele(nd.ni_vp);
2839                         return (ENOENT);
2840                 }
2841         } else {
2842                 proc_fdlock(p);
2843                 tvp = fdp->fd_cdir;
2844                 fdp->fd_cdir = nd.ni_vp;
2845                 proc_fdunlock(p);
2846         }
2847
2848         if (tvp)
2849                 vnode_rele(tvp);
2850
2851         return (0);
2852 }
2853
2854
2855 /*
2856  * chdir
2857  *
2858  * Change current working directory (".") for the entire process
2859  *
2860  * Parameters:  p       Process requesting the call
2861  *              uap     User argument descriptor (see below)
2862  *              retval  (ignored)
2863  *
2864  * Indirect parameters: uap->path       Directory path
2865  *
2866  * Returns:     0                       Success
2867  *              common_chdir: ENOTDIR
2868  *              common_chdir: ENOENT    No such file or directory
2869  *              common_chdir: ???
2870  *
2871  */
2872 int
2873 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
2874 {
2875         return common_chdir(p, (void *)uap, 0);
2876 }
2877
2878 /*
2879  * __pthread_chdir
2880  *
2881  * Change current working directory (".") for a single thread
2882  *
2883  * Parameters:  p       Process requesting the call
2884  *              uap     User argument descriptor (see below)
2885  *              retval  (ignored)
2886  *
2887  * Indirect parameters: uap->path       Directory path
2888  *
2889  * Returns:     0                       Success
2890  *              common_chdir: ENOTDIR
2891  *              common_chdir: ENOENT    No such file or directory
2892  *              common_chdir: ???
2893  *
2894  */
2895 int
2896 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
2897 {
2898         return common_chdir(p, (void *)uap, 1);
2899 }
2900
2901
2902 /*
2903  * Change notion of root (``/'') directory.
2904  */
2905 /* ARGSUSED */
2906 int
2907 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
2908 {
2909         struct filedesc *fdp = p->p_fd;
2910         int error;
2911         struct nameidata nd;
2912         vnode_t tvp;
2913         vfs_context_t ctx = vfs_context_current();
2914
2915         if ((error = suser(kauth_cred_get(), &p->p_acflag)))
2916                 return (error);
2917
2918         NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
2919                 UIO_USERSPACE, uap->path, ctx);
2920         error = change_dir(&nd, ctx);
2921         if (error)
2922                 return (error);
2923
2924 #if CONFIG_MACF
2925         error = mac_vnode_check_chroot(ctx, nd.ni_vp,
2926             &nd.ni_cnd);
2927         if (error) {
2928                 vnode_put(nd.ni_vp);
2929                 return (error);
2930         }
2931 #endif
2932
2933         if ( (error = vnode_ref(nd.ni_vp)) ) {
2934                 vnode_put(nd.ni_vp);
2935                 return (error);
2936         }
2937         vnode_put(nd.ni_vp);
2938
2939         proc_fdlock(p);
2940         tvp = fdp->fd_rdir;
2941         fdp->fd_rdir = nd.ni_vp;
2942         fdp->fd_flags |= FD_CHROOT;
2943         proc_fdunlock(p);
2944
2945         if (tvp != NULL)
2946                 vnode_rele(tvp);
2947
2948         return (0);
2949 }
2950
2951 /*
2952  * Common routine for chroot and chdir.
2953  *
2954  * Returns:     0                       Success
2955  *              ENOTDIR                 Not a directory
2956  *              namei:???               [anything namei can return]
2957  *              vnode_authorize:???     [anything vnode_authorize can return]
2958  */
2959 static int
2960 change_dir(struct nameidata *ndp, vfs_context_t ctx)
2961 {
2962         vnode_t vp;
2963         int error;
2964
2965         if ((error = namei(ndp)))
2966                 return (error);
2967         nameidone(ndp);
2968         vp = ndp->ni_vp;
2969
2970         if (vp->v_type != VDIR) {
2971                 vnode_put(vp);
2972                 return (ENOTDIR);
2973         }
2974
2975 #if CONFIG_MACF
2976         error = mac_vnode_check_chdir(ctx, vp);
2977         if (error) {
2978                 vnode_put(vp);
2979                 return (error);
2980         }
2981 #endif
2982
2983         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2984         if (error) {
2985                 vnode_put(vp);
2986                 return (error);
2987         }
2988
2989         return (error);
2990 }
2991
2992 /*
2993  * Check permissions, allocate an open file structure,
2994  * and call the device open routine if any.
2995  *
2996  * Returns:     0                       Success
2997  *              EINVAL
2998  *              EINTR
2999  *      falloc:ENFILE
3000  *      falloc:EMFILE
3001  *      falloc:ENOMEM
3002  *      vn_open_auth:???
3003  *      dupfdopen:???
3004  *      VNOP_ADVLOCK:???
3005  *      vnode_setsize:???
3006  *
3007  * XXX Need to implement uid, gid
3008  */
3009 int
3010 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3011     struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3012     int32_t *retval)
3013 {
3014         proc_t p = vfs_context_proc(ctx);
3015         uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
3016         struct fileproc *fp;
3017         vnode_t vp;
3018         int flags, oflags;
3019         int type, indx, error;
3020         struct flock lf;
3021         int no_controlling_tty = 0;
3022         int deny_controlling_tty = 0;
3023         struct session *sessp = SESSION_NULL;
3024
3025         oflags = uflags;
3026
3027         if ((oflags & O_ACCMODE) == O_ACCMODE)
3028                 return(EINVAL);
3029         flags = FFLAGS(uflags);
3030
3031         AUDIT_ARG(fflags, oflags);
3032         AUDIT_ARG(mode, vap->va_mode);
3033
3034         if ((error = falloc_withalloc(p,
3035             &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
3036                 return (error);
3037         }
3038         uu->uu_dupfd = -indx - 1;
3039
3040         if (!(p->p_flag & P_CONTROLT)) {
3041                 sessp = proc_session(p);
3042                 no_controlling_tty = 1;
3043                 /*
3044                  * If conditions would warrant getting a controlling tty if
3045                  * the device being opened is a tty (see ttyopen in tty.c),
3046                  * but the open flags deny it, set a flag in the session to
3047                  * prevent it.
3048                  */
3049                 if (SESS_LEADER(p, sessp) &&
3050                     sessp->s_ttyvp == NULL &&
3051                     (flags & O_NOCTTY)) {
3052                         session_lock(sessp);
3053                         sessp->s_flags |= S_NOCTTY;
3054                         session_unlock(sessp);
3055                         deny_controlling_tty = 1;
3056                 }
3057         }
3058
3059         if ((error = vn_open_auth(ndp, &flags, vap))) {
3060                 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){        /* XXX from fdopen */
3061                         if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
3062                                 fp_drop(p, indx, NULL, 0);
3063                                 *retval = indx;
3064                                 if (deny_controlling_tty) {
3065                                         session_lock(sessp);
3066                                         sessp->s_flags &= ~S_NOCTTY;
3067                                         session_unlock(sessp);
3068                                 }
3069                                 if (sessp != SESSION_NULL)
3070                                         session_rele(sessp);
3071                                 return (0);
3072                         }
3073                 }
3074                 if (error == ERESTART)
3075                         error = EINTR;
3076                 fp_free(p, indx, fp);
3077
3078                 if (deny_controlling_tty) {
3079                         session_lock(sessp);
3080                         sessp->s_flags &= ~S_NOCTTY;
3081                         session_unlock(sessp);
3082                 }
3083                 if (sessp != SESSION_NULL)
3084                         session_rele(sessp);
3085                 return (error);
3086         }
3087         uu->uu_dupfd = 0;
3088         vp = ndp->ni_vp;
3089
3090         fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
3091         fp->f_fglob->fg_ops = &vnops;
3092         fp->f_fglob->fg_data = (caddr_t)vp;
3093
3094 #if CONFIG_PROTECT
3095         if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) {
3096                 if (vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) {
3097                         fp->f_fglob->fg_flag |= FENCRYPTED;
3098                 }
3099         }
3100 #endif
3101
3102         if (flags & (O_EXLOCK | O_SHLOCK)) {
3103                 lf.l_whence = SEEK_SET;
3104                 lf.l_start = 0;
3105                 lf.l_len = 0;
3106                 if (flags & O_EXLOCK)
3107                         lf.l_type = F_WRLCK;
3108                 else
3109                         lf.l_type = F_RDLCK;
3110                 type = F_FLOCK;
3111                 if ((flags & FNONBLOCK) == 0)
3112                         type |= F_WAIT;
3113 #if CONFIG_MACF
3114                 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3115                     F_SETLK, &lf);
3116                 if (error)
3117                         goto bad;
3118 #endif
3119                 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
3120                         goto bad;
3121                 fp->f_fglob->fg_flag |= FHASLOCK;
3122         }
3123
3124         /* try to truncate by setting the size attribute */
3125         if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3126                 goto bad;
3127
3128         /*
3129          * If the open flags denied the acquisition of a controlling tty,
3130          * clear the flag in the session structure that prevented the lower
3131          * level code from assigning one.
3132          */
3133         if (deny_controlling_tty) {
3134                 session_lock(sessp);
3135                 sessp->s_flags &= ~S_NOCTTY;
3136                 session_unlock(sessp);
3137         }
3138
3139         /*
3140          * If a controlling tty was set by the tty line discipline, then we
3141          * want to set the vp of the tty into the session structure.  We have
3142          * a race here because we can't get to the vp for the tp in ttyopen,
3143          * because it's not passed as a parameter in the open path.
3144          */
3145         if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
3146                 vnode_t ttyvp;
3147
3148                 /*
3149                  * We already have a ref from vn_open_auth(), so we can demand another reference.
3150                  */
3151                 error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE);
3152                 if (error != 0) {
3153                         panic("vnode_ref_ext() with VNODE_REF_FORCE failed?!");
3154                 }
3155
3156                 session_lock(sessp);
3157                 ttyvp = sessp->s_ttyvp;
3158                 sessp->s_ttyvp = vp;
3159                 sessp->s_ttyvid = vnode_vid(vp);
3160                 session_unlock(sessp);
3161                 if (ttyvp != NULLVP)
3162                         vnode_rele(ttyvp);
3163         }
3164
3165         vnode_put(vp);
3166
3167         proc_fdlock(p);
3168         if (flags & O_CLOEXEC)
3169                 *fdflags(p, indx) |= UF_EXCLOSE;
3170         if (flags & O_CLOFORK)
3171                 *fdflags(p, indx) |= UF_FORKCLOSE;
3172         procfdtbl_releasefd(p, indx, NULL);
3173         fp_drop(p, indx, fp, 1);
3174         proc_fdunlock(p);
3175
3176         *retval = indx;
3177
3178         if (sessp != SESSION_NULL)
3179                 session_rele(sessp);
3180         return (0);
3181 bad:
3182         if (deny_controlling_tty) {
3183                 session_lock(sessp);
3184                 sessp->s_flags &= ~S_NOCTTY;
3185                 session_unlock(sessp);
3186         }
3187         if (sessp != SESSION_NULL)
3188                 session_rele(sessp);
3189
3190         struct vfs_context context = *vfs_context_current();
3191         context.vc_ucred = fp->f_fglob->fg_cred;
3192
3193         vn_close(vp, fp->f_fglob->fg_flag, &context);
3194         vnode_put(vp);
3195         fp_free(p, indx, fp);
3196
3197         return (error);
3198 }
3199
3200 /*
3201  * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3202  *
3203  * Parameters:  p                       Process requesting the open
3204  *              uap                     User argument descriptor (see below)
3205  *              retval                  Pointer to an area to receive the
3206  *                                      return calue from the system call
3207  *
3208  * Indirect:    uap->path               Path to open (same as 'open')
3209  *              uap->flags              Flags to open (same as 'open'
3210  *              uap->uid                UID to set, if creating
3211  *              uap->gid                GID to set, if creating
3212  *              uap->mode               File mode, if creating (same as 'open')
3213  *              uap->xsecurity          ACL to set, if creating
3214  *
3215  * Returns:     0                       Success
3216  *              !0                      errno value
3217  *
3218  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
3219  *
3220  * XXX:         We should enummerate the possible errno values here, and where
3221  *              in the code they originated.
3222  */
3223 int
3224 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
3225 {
3226         struct filedesc *fdp = p->p_fd;
3227         int ciferror;
3228         kauth_filesec_t xsecdst;
3229         struct vnode_attr va;
3230         struct nameidata nd;
3231         int cmode;
3232
3233         AUDIT_ARG(owner, uap->uid, uap->gid);
3234
3235         xsecdst = NULL;
3236         if ((uap->xsecurity != USER_ADDR_NULL) &&
3237             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3238                 return ciferror;
3239
3240         VATTR_INIT(&va);
3241         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3242         VATTR_SET(&va, va_mode, cmode);
3243         if (uap->uid != KAUTH_UID_NONE)
3244                 VATTR_SET(&va, va_uid, uap->uid);
3245         if (uap->gid != KAUTH_GID_NONE)
3246                 VATTR_SET(&va, va_gid, uap->gid);
3247         if (xsecdst != NULL)
3248                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3249
3250         NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3251                uap->path, vfs_context_current());
3252
3253         ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3254                          fileproc_alloc_init, NULL, retval);
3255         if (xsecdst != NULL)
3256                 kauth_filesec_free(xsecdst);
3257
3258         return ciferror;
3259 }
3260
3261 /*
3262  * Go through the data-protected atomically controlled open (2)
3263  *
3264  * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3265  */
3266 int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3267         int flags = uap->flags;
3268         int class = uap->class;
3269         int dpflags = uap->dpflags;
3270
3271         /*
3272          * Follow the same path as normal open(2)
3273          * Look up the item if it exists, and acquire the vnode.
3274          */
3275         struct filedesc *fdp = p->p_fd;
3276         struct vnode_attr va;
3277         struct nameidata nd;
3278         int cmode;
3279         int error;
3280
3281         VATTR_INIT(&va);
3282         /* Mask off all but regular access permissions */
3283         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3284         VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3285
3286         NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3287                uap->path, vfs_context_current());
3288
3289         /*
3290          * Initialize the extra fields in vnode_attr to pass down our
3291          * extra fields.
3292          * 1. target cprotect class.
3293          * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3294          */
3295         if (flags & O_CREAT) {
3296                 VATTR_SET(&va, va_dataprotect_class, class);
3297         }
3298
3299         if (dpflags & O_DP_GETRAWENCRYPTED) {
3300                 if ( flags & (O_RDWR | O_WRONLY)) {
3301                         /* Not allowed to write raw encrypted bytes */
3302                         return EINVAL;
3303                 }
3304                 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3305         }
3306
3307         error = open1(vfs_context_current(), &nd, uap->flags, &va,
3308                       fileproc_alloc_init, NULL, retval);
3309
3310         return error;
3311 }
3312
3313
3314 int
3315 open(proc_t p, struct open_args *uap, int32_t *retval)
3316 {
3317         __pthread_testcancel(1);
3318         return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3319 }
3320
3321 int
3322 open_nocancel(proc_t p, struct open_nocancel_args *uap, int32_t *retval)
3323 {
3324         struct filedesc *fdp = p->p_fd;
3325         struct vnode_attr va;
3326         struct nameidata nd;
3327         int cmode;
3328
3329         VATTR_INIT(&va);
3330         /* Mask off all but regular access permissions */
3331         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3332         VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3333
3334         NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3335                uap->path, vfs_context_current());
3336
3337         return (open1(vfs_context_current(), &nd, uap->flags, &va,
3338                       fileproc_alloc_init, NULL, retval));
3339 }
3340
3341
3342 /*
3343  * Create a special file.
3344  */
3345 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
3346
3347 int
3348 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
3349 {
3350         struct vnode_attr va;
3351         vfs_context_t ctx = vfs_context_current();
3352         int error;
3353         struct nameidata nd;
3354         vnode_t vp, dvp;
3355
3356         VATTR_INIT(&va);
3357         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3358         VATTR_SET(&va, va_rdev, uap->dev);
3359
3360         /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3361         if ((uap->mode & S_IFMT) == S_IFIFO)
3362                 return(mkfifo1(ctx, uap->path, &va));
3363
3364         AUDIT_ARG(mode, uap->mode);
3365         AUDIT_ARG(value32, uap->dev);
3366
3367         if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
3368                 return (error);
3369         NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
3370                 UIO_USERSPACE, uap->path, ctx);
3371         error = namei(&nd);
3372         if (error)
3373                 return (error);
3374         dvp = nd.ni_dvp;
3375         vp = nd.ni_vp;
3376
3377         if (vp != NULL) {
3378                 error = EEXIST;
3379                 goto out;
3380         }
3381
3382         switch (uap->mode & S_IFMT) {
3383         case S_IFMT:    /* used by badsect to flag bad sectors */
3384                 VATTR_SET(&va, va_type, VBAD);
3385                 break;
3386         case S_IFCHR:
3387                 VATTR_SET(&va, va_type, VCHR);
3388                 break;
3389         case S_IFBLK:
3390                 VATTR_SET(&va, va_type, VBLK);
3391                 break;
3392         default:
3393                 error = EINVAL;
3394                 goto out;
3395         }
3396
3397 #if CONFIG_MACF
3398         error = mac_vnode_check_create(ctx,
3399             nd.ni_dvp, &nd.ni_cnd, &va);
3400         if (error)
3401                 goto out;
3402 #endif
3403
3404         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3405                 goto out;
3406
3407         if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
3408                 goto out;
3409
3410         if (vp) {
3411                 int     update_flags = 0;
3412
3413                 // Make sure the name & parent pointers are hooked up
3414                 if (vp->v_name == NULL)
3415                         update_flags |= VNODE_UPDATE_NAME;
3416                 if (vp->v_parent == NULLVP)
3417                         update_flags |= VNODE_UPDATE_PARENT;
3418
3419                 if (update_flags)
3420                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3421
3422 #if CONFIG_FSE
3423                 add_fsevent(FSE_CREATE_FILE, ctx,
3424                     FSE_ARG_VNODE, vp,
3425                     FSE_ARG_DONE);
3426 #endif
3427         }
3428
3429 out:
3430         /*
3431          * nameidone has to happen before we vnode_put(dvp)
3432          * since it may need to release the fs_nodelock on the dvp
3433          */
3434         nameidone(&nd);
3435
3436         if (vp)
3437                 vnode_put(vp);
3438         vnode_put(dvp);
3439
3440         return (error);
3441 }
3442
3443 /*
3444  * Create a named pipe.
3445  *
3446  * Returns:     0                       Success
3447  *              EEXIST
3448  *      namei:???
3449  *      vnode_authorize:???
3450  *      vn_create:???
3451  */
3452 static int
3453 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
3454 {
3455         vnode_t vp, dvp;
3456         int error;
3457         struct nameidata nd;
3458
3459         NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
3460                 UIO_USERSPACE, upath, ctx);
3461         error = namei(&nd);
3462         if (error)
3463                 return (error);
3464         dvp = nd.ni_dvp;
3465         vp = nd.ni_vp;
3466
3467         /* check that this is a new file and authorize addition */
3468         if (vp != NULL) {
3469                 error = EEXIST;
3470                 goto out;
3471         }
3472         VATTR_SET(vap, va_type, VFIFO);
3473
3474         if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
3475                 goto out;
3476
3477         error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
3478 out:
3479         /*
3480          * nameidone has to happen before we vnode_put(dvp)
3481          * since it may need to release the fs_nodelock on the dvp
3482          */
3483         nameidone(&nd);
3484
3485         if (vp)
3486                 vnode_put(vp);
3487         vnode_put(dvp);
3488
3489         return error;
3490 }
3491
3492
3493 /*
3494  * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
3495  *
3496  * Parameters:  p                       Process requesting the open
3497  *              uap                     User argument descriptor (see below)
3498  *              retval                  (Ignored)
3499  *
3500  * Indirect:    uap->path               Path to fifo (same as 'mkfifo')
3501  *              uap->uid                UID to set
3502  *              uap->gid                GID to set
3503  *              uap->mode               File mode to set (same as 'mkfifo')
3504  *              uap->xsecurity          ACL to set, if creating
3505  *
3506  * Returns:     0                       Success
3507  *              !0                      errno value
3508  *
3509  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
3510  *
3511  * XXX:         We should enummerate the possible errno values here, and where
3512  *              in the code they originated.
3513  */
3514 int
3515 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
3516 {
3517         int ciferror;
3518         kauth_filesec_t xsecdst;
3519         struct vnode_attr va;
3520
3521         AUDIT_ARG(owner, uap->uid, uap->gid);
3522
3523         xsecdst = KAUTH_FILESEC_NONE;
3524         if (uap->xsecurity != USER_ADDR_NULL) {
3525                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
3526                         return ciferror;
3527         }
3528
3529         VATTR_INIT(&va);
3530         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3531         if (uap->uid != KAUTH_UID_NONE)
3532                 VATTR_SET(&va, va_uid, uap->uid);
3533         if (uap->gid != KAUTH_GID_NONE)
3534                 VATTR_SET(&va, va_gid, uap->gid);
3535         if (xsecdst != KAUTH_FILESEC_NONE)
3536                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3537
3538         ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
3539
3540         if (xsecdst != KAUTH_FILESEC_NONE)
3541                 kauth_filesec_free(xsecdst);
3542         return ciferror;
3543 }
3544
3545 /* ARGSUSED */
3546 int
3547 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
3548 {
3549         struct vnode_attr va;
3550
3551         VATTR_INIT(&va);
3552         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3553
3554         return(mkfifo1(vfs_context_current(), uap->path, &va));
3555 }
3556
3557
3558 static char *
3559 my_strrchr(char *p, int ch)
3560 {
3561         char *save;
3562
3563         for (save = NULL;; ++p) {
3564                 if (*p == ch)
3565                         save = p;
3566                 if (!*p)
3567                         return(save);
3568         }
3569         /* NOTREACHED */
3570 }
3571
3572 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
3573
3574 int
3575 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
3576 {
3577         int ret, len = _len;
3578
3579         *truncated_path = 0;
3580         ret = vn_getpath(dvp, path, &len);
3581         if (ret == 0 && len < (MAXPATHLEN - 1)) {
3582                 if (leafname) {
3583                         path[len-1] = '/';
3584                         len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
3585                         if (len > MAXPATHLEN) {
3586                                 char *ptr;
3587
3588                                 // the string got truncated!
3589                                 *truncated_path = 1;
3590                                 ptr = my_strrchr(path, '/');
3591                                 if (ptr) {
3592                                         *ptr = '\0';   // chop off the string at the last directory component
3593                                 }
3594                                 len = strlen(path) + 1;
3595                         }
3596                 }
3597         } else if (ret == 0) {
3598                 *truncated_path = 1;
3599         } else if (ret != 0) {
3600                 struct vnode *mydvp=dvp;
3601
3602                 if (ret != ENOSPC) {
3603                         printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
3604                                dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
3605                 }
3606                 *truncated_path = 1;
3607
3608                 do {
3609                         if (mydvp->v_parent != NULL) {
3610                                 mydvp = mydvp->v_parent;
3611                         } else if (mydvp->v_mount) {
3612                                 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
3613                                 break;
3614                         } else {
3615                                 // no parent and no mount point?  only thing is to punt and say "/" changed
3616                                 strlcpy(path, "/", _len);
3617                                 len = 2;
3618                                 mydvp = NULL;
3619                         }
3620
3621                         if (mydvp == NULL) {
3622                                 break;
3623                         }
3624
3625                         len = _len;
3626                         ret = vn_getpath(mydvp, path, &len);
3627                 } while (ret == ENOSPC);
3628         }
3629
3630         return len;
3631 }
3632
3633
3634 /*
3635  * Make a hard file link.
3636  *
3637  * Returns:     0                       Success
3638  *              EPERM
3639  *              EEXIST
3640  *              EXDEV
3641  *      namei:???
3642  *      vnode_authorize:???
3643  *      VNOP_LINK:???
3644  */
3645 /* ARGSUSED */
3646 int
3647 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
3648 {
3649         vnode_t vp, dvp, lvp;
3650         struct nameidata nd;
3651         vfs_context_t ctx = vfs_context_current();
3652         int error;
3653 #if CONFIG_FSE
3654         fse_info finfo;
3655 #endif
3656         int need_event, has_listeners;
3657         char *target_path = NULL;
3658         int truncated=0;
3659
3660         vp = dvp = lvp = NULLVP;
3661
3662         /* look up the object we are linking to */
3663         NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1,
3664                 UIO_USERSPACE, uap->path, ctx);
3665         error = namei(&nd);
3666         if (error)
3667                 return (error);
3668         vp = nd.ni_vp;
3669
3670         nameidone(&nd);
3671
3672         /*
3673          * Normally, linking to directories is not supported.
3674          * However, some file systems may have limited support.
3675          */
3676         if (vp->v_type == VDIR) {
3677                 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
3678                         error = EPERM;   /* POSIX */
3679                         goto out;
3680                 }
3681                 /* Linking to a directory requires ownership. */
3682                 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
3683                         struct vnode_attr dva;
3684
3685                         VATTR_INIT(&dva);
3686                         VATTR_WANTED(&dva, va_uid);
3687                         if (vnode_getattr(vp, &dva, ctx) != 0 ||
3688                             !VATTR_IS_SUPPORTED(&dva, va_uid) ||
3689                             (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
3690                                 error = EACCES;
3691                                 goto out;
3692                         }
3693                 }
3694         }
3695
3696         /* lookup the target node */
3697 #if CONFIG_TRIGGERS
3698         nd.ni_op = OP_LINK;
3699 #endif
3700         nd.ni_cnd.cn_nameiop = CREATE;
3701         nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
3702         nd.ni_dirp = uap->link;
3703         error = namei(&nd);
3704         if (error != 0)
3705                 goto out;
3706         dvp = nd.ni_dvp;
3707         lvp = nd.ni_vp;
3708
3709 #if CONFIG_MACF
3710         if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
3711                 goto out2;
3712 #endif
3713
3714         /* or to anything that kauth doesn't want us to (eg. immutable items) */
3715         if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
3716                 goto out2;
3717
3718         /* target node must not exist */
3719         if (lvp != NULLVP) {
3720                 error = EEXIST;
3721                 goto out2;
3722         }
3723         /* cannot link across mountpoints */
3724         if (vnode_mount(vp) != vnode_mount(dvp)) {
3725                 error = EXDEV;
3726                 goto out2;
3727         }
3728
3729         /* authorize creation of the target note */
3730         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3731                 goto out2;
3732
3733         /* and finally make the link */
3734         error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
3735         if (error)
3736                 goto out2;
3737
3738 #if CONFIG_MACF
3739         (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
3740 #endif
3741
3742 #if CONFIG_FSE
3743         need_event = need_fsevent(FSE_CREATE_FILE, dvp);
3744 #else
3745         need_event = 0;
3746 #endif
3747         has_listeners = kauth_authorize_fileop_has_listeners();
3748
3749         if (need_event || has_listeners) {
3750                 char *link_to_path = NULL;
3751                 int len, link_name_len;
3752
3753                 /* build the path to the new link file */
3754                 GET_PATH(target_path);
3755                 if (target_path == NULL) {
3756                         error = ENOMEM;
3757                         goto out2;
3758                 }
3759
3760                 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
3761
3762                 if (has_listeners) {
3763                         /* build the path to file we are linking to */
3764                         GET_PATH(link_to_path);
3765                         if (link_to_path == NULL) {
3766                                 error = ENOMEM;
3767                                 goto out2;
3768                         }
3769
3770                         link_name_len = MAXPATHLEN;
3771                         vn_getpath(vp, link_to_path, &link_name_len);
3772
3773                         /*
3774                          * Call out to allow 3rd party notification of rename.
3775                          * Ignore result of kauth_authorize_fileop call.
3776                          */
3777                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
3778                                                (uintptr_t)link_to_path, (uintptr_t)target_path);
3779                         if (link_to_path != NULL) {
3780                                 RELEASE_PATH(link_to_path);
3781                         }
3782                 }
3783 #if CONFIG_FSE
3784                 if (need_event) {
3785                         /* construct fsevent */
3786                         if (get_fse_info(vp, &finfo, ctx) == 0) {
3787                                 if (truncated) {
3788                                         finfo.mode |= FSE_TRUNCATED_PATH;
3789                                 }
3790
3791                                 // build the path to the destination of the link
3792                                 add_fsevent(FSE_CREATE_FILE, ctx,
3793                                             FSE_ARG_STRING, len, target_path,
3794                                             FSE_ARG_FINFO, &finfo,
3795                                             FSE_ARG_DONE);
3796                         }
3797                         if (vp->v_parent) {
3798                             add_fsevent(FSE_STAT_CHANGED, ctx,
3799                                 FSE_ARG_VNODE, vp->v_parent,
3800                                 FSE_ARG_DONE);
3801                         }
3802                 }
3803 #endif
3804         }
3805 out2:
3806         /*
3807          * nameidone has to happen before we vnode_put(dvp)
3808          * since it may need to release the fs_nodelock on the dvp
3809          */
3810         nameidone(&nd);
3811         if (target_path != NULL) {
3812                 RELEASE_PATH(target_path);
3813         }
3814 out:
3815         if (lvp)
3816                 vnode_put(lvp);
3817         if (dvp)
3818                 vnode_put(dvp);
3819         vnode_put(vp);
3820         return (error);
3821 }
3822
3823 /*
3824  * Make a symbolic link.
3825  *
3826  * We could add support for ACLs here too...
3827  */
3828 /* ARGSUSED */
3829 int
3830 symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval)
3831 {
3832         struct vnode_attr va;
3833         char *path;
3834         int error;
3835         struct nameidata nd;
3836         vfs_context_t ctx = vfs_context_current();
3837         vnode_t vp, dvp;
3838         size_t dummy=0;
3839
3840         MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
3841         error = copyinstr(uap->path, path, MAXPATHLEN, &dummy);
3842         if (error)
3843                 goto out;
3844         AUDIT_ARG(text, path);  /* This is the link string */
3845
3846         NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
3847                 UIO_USERSPACE, uap->link, ctx);
3848         error = namei(&nd);
3849         if (error)
3850                 goto out;
3851         dvp = nd.ni_dvp;
3852         vp = nd.ni_vp;
3853
3854         VATTR_INIT(&va);
3855         VATTR_SET(&va, va_type, VLNK);
3856         VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
3857 #if CONFIG_MACF
3858         error = mac_vnode_check_create(ctx,
3859                         dvp, &nd.ni_cnd, &va);
3860 #endif
3861         if (error != 0) {
3862             goto skipit;
3863         }
3864
3865         if (vp != NULL) {
3866             error = EEXIST;
3867             goto skipit;
3868         }
3869
3870         /* authorize */
3871         if (error == 0)
3872                 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
3873         /* get default ownership, etc. */
3874         if (error == 0)
3875                 error = vnode_authattr_new(dvp, &va, 0, ctx);
3876         if (error == 0)
3877                 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
3878
3879 #if CONFIG_MACF
3880         if (error == 0)
3881                 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
3882 #endif
3883
3884         /* do fallback attribute handling */
3885         if (error == 0)
3886                 error = vnode_setattr_fallback(vp, &va, ctx);
3887
3888         if (error == 0) {
3889                 int     update_flags = 0;
3890
3891                 if (vp == NULL) {
3892                         nd.ni_cnd.cn_nameiop = LOOKUP;
3893 #if CONFIG_TRIGGERS
3894                         nd.ni_op = OP_LOOKUP;
3895 #endif
3896                         nd.ni_cnd.cn_flags = 0;
3897                         error = namei(&nd);
3898                         vp = nd.ni_vp;
3899
3900                         if (vp == NULL)
3901                                 goto skipit;
3902                 }
3903
3904 #if 0  /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
3905                 /* call out to allow 3rd party notification of rename.
3906                  * Ignore result of kauth_authorize_fileop call.
3907                  */
3908                 if (kauth_authorize_fileop_has_listeners() &&
3909                     namei(&nd) == 0) {
3910                         char *new_link_path = NULL;
3911                         int             len;
3912
3913                         /* build the path to the new link file */
3914                         new_link_path = get_pathbuff();
3915                         len = MAXPATHLEN;
3916                         vn_getpath(dvp, new_link_path, &len);
3917                         if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
3918                                 new_link_path[len - 1] = '/';
3919                                 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
3920                         }
3921
3922                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
3923                                            (uintptr_t)path, (uintptr_t)new_link_path);
3924                         if (new_link_path != NULL)
3925                                 release_pathbuff(new_link_path);
3926                 }
3927 #endif
3928                 // Make sure the name & parent pointers are hooked up
3929                 if (vp->v_name == NULL)
3930                         update_flags |= VNODE_UPDATE_NAME;
3931                 if (vp->v_parent == NULLVP)
3932                         update_flags |= VNODE_UPDATE_PARENT;
3933
3934                 if (update_flags)
3935                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3936
3937 #if CONFIG_FSE
3938                 add_fsevent(FSE_CREATE_FILE, ctx,
3939                             FSE_ARG_VNODE, vp,
3940                             FSE_ARG_DONE);
3941 #endif
3942         }
3943
3944 skipit:
3945         /*
3946          * nameidone has to happen before we vnode_put(dvp)
3947          * since it may need to release the fs_nodelock on the dvp
3948          */
3949         nameidone(&nd);
3950
3951         if (vp)
3952                 vnode_put(vp);
3953         vnode_put(dvp);
3954 out:
3955         FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
3956
3957         return (error);
3958 }
3959
3960 /*
3961  * Delete a whiteout from the filesystem.
3962  * XXX authorization not implmented for whiteouts
3963  */
3964 int
3965 undelete(__unused proc_t p, struct undelete_args *uap, __unused int32_t *retval)
3966 {
3967         int error;
3968         struct nameidata nd;
3969         vfs_context_t ctx = vfs_context_current();
3970         vnode_t vp, dvp;
3971
3972         NDINIT(&nd, DELETE, OP_UNLINK, LOCKPARENT | DOWHITEOUT | AUDITVNPATH1,
3973                 UIO_USERSPACE, uap->path, ctx);
3974         error = namei(&nd);
3975         if (error)
3976                 return (error);
3977         dvp = nd.ni_dvp;
3978         vp = nd.ni_vp;
3979
3980         if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) {
3981                 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, ctx);
3982         } else
3983                 error = EEXIST;
3984
3985         /*
3986          * nameidone has to happen before we vnode_put(dvp)
3987          * since it may need to release the fs_nodelock on the dvp
3988          */
3989         nameidone(&nd);
3990
3991         if (vp)
3992                 vnode_put(vp);
3993         vnode_put(dvp);
3994
3995         return (error);
3996 }
3997
3998
3999 /*
4000  * Delete a name from the filesystem.
4001  */
4002 /* ARGSUSED */
4003 int
4004 unlink1(vfs_context_t ctx, struct nameidata *ndp, int unlink_flags)
4005 {
4006         vnode_t vp, dvp;
4007         int error;
4008         struct componentname *cnp;
4009         char  *path = NULL;
4010         int  len=0;
4011 #if CONFIG_FSE
4012         fse_info  finfo;
4013         struct vnode_attr va;
4014 #endif
4015         int flags = 0;
4016         int need_event = 0;
4017         int has_listeners = 0;
4018         int truncated_path=0;
4019         int batched;
4020         struct vnode_attr *vap = NULL;
4021
4022 #if NAMEDRSRCFORK
4023         /* unlink or delete is allowed on rsrc forks and named streams */
4024         ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4025 #endif
4026
4027         ndp->ni_cnd.cn_flags |= LOCKPARENT;
4028         ndp->ni_flag |= NAMEI_COMPOUNDREMOVE;
4029         cnp = &ndp->ni_cnd;
4030
4031 lookup_continue:
4032         error = namei(ndp);
4033         if (error)
4034                 return (error);
4035
4036         dvp = ndp->ni_dvp;
4037         vp = ndp->ni_vp;
4038
4039
4040         /* With Carbon delete semantics, busy files cannot be deleted */
4041         if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
4042                 flags |= VNODE_REMOVE_NODELETEBUSY;
4043         }
4044
4045         /* Skip any potential upcalls if told to. */
4046         if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4047                 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4048         }
4049
4050         if (vp) {
4051                 batched = vnode_compound_remove_available(vp);
4052                 /*
4053                  * The root of a mounted filesystem cannot be deleted.
4054                  */
4055                 if (vp->v_flag & VROOT) {
4056                         error = EBUSY;
4057                 }
4058
4059                 if (!batched) {
4060                         error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4061                         if (error) {
4062                                 goto out;
4063                         }
4064                 }
4065         } else {
4066                 batched = 1;
4067
4068                 if (!vnode_compound_remove_available(dvp)) {
4069                         panic("No vp, but no compound remove?");
4070                 }
4071         }
4072
4073 #if CONFIG_FSE
4074         need_event = need_fsevent(FSE_DELETE, dvp);
4075         if (need_event) {
4076                 if (!batched) {
4077                         if ((vp->v_flag & VISHARDLINK) == 0) {
4078                                 /* XXX need to get these data in batched VNOP */
4079                                 get_fse_info(vp, &finfo, ctx);
4080                         }
4081                 } else {
4082                         error = vfs_get_notify_attributes(&va);
4083                         if (error) {
4084                                 goto out;
4085                         }
4086
4087                         vap = &va;
4088                 }
4089         }
4090 #endif
4091         has_listeners = kauth_authorize_fileop_has_listeners();
4092         if (need_event || has_listeners) {
4093                 if (path == NULL) {
4094                         GET_PATH(path);
4095                         if (path == NULL) {
4096                                 error = ENOMEM;
4097                                 goto out;
4098                         }
4099                 }
4100                 len = safe_getpath(dvp, ndp->ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
4101         }
4102
4103 #if NAMEDRSRCFORK
4104         if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK)
4105                 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4106         else
4107 #endif
4108         {
4109                 error = vn_remove(dvp, &ndp->ni_vp, ndp, flags, vap, ctx);
4110                 vp = ndp->ni_vp;
4111                 if (error == EKEEPLOOKING) {
4112                         if (!batched) {
4113                                 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4114                         }
4115
4116                         if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
4117                                 panic("EKEEPLOOKING, but continue flag not set?");
4118                         }
4119
4120                         if (vnode_isdir(vp)) {
4121                                 error = EISDIR;
4122                                 goto out;
4123                         }
4124                         goto lookup_continue;
4125                 }
4126         }
4127
4128         /*
4129          * Call out to allow 3rd party notification of delete.
4130          * Ignore result of kauth_authorize_fileop call.
4131          */
4132         if (!error) {
4133                 if (has_listeners) {
4134                         kauth_authorize_fileop(vfs_context_ucred(ctx),
4135                                 KAUTH_FILEOP_DELETE,
4136                                 (uintptr_t)vp,
4137                                 (uintptr_t)path);
4138                 }
4139
4140                 if (vp->v_flag & VISHARDLINK) {
4141                     //
4142                     // if a hardlink gets deleted we want to blow away the
4143                     // v_parent link because the path that got us to this
4144                     // instance of the link is no longer valid.  this will
4145                     // force the next call to get the path to ask the file
4146                     // system instead of just following the v_parent link.
4147                     //
4148                     vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
4149                 }
4150
4151 #if CONFIG_FSE
4152                 if (need_event) {
4153                         if (vp->v_flag & VISHARDLINK) {
4154                                 get_fse_info(vp, &finfo, ctx);
4155                         } else if (vap) {
4156                                 vnode_get_fse_info_from_vap(vp, &finfo, vap);
4157                         }
4158                         if (truncated_path) {
4159                                 finfo.mode |= FSE_TRUNCATED_PATH;
4160                         }
4161                         add_fsevent(FSE_DELETE, ctx,
4162                                                 FSE_ARG_STRING, len, path,
4163                                                 FSE_ARG_FINFO, &finfo,
4164                                                 FSE_ARG_DONE);
4165                 }
4166 #endif
4167         }
4168
4169 out:
4170         if (path != NULL)
4171                 RELEASE_PATH(path);
4172
4173 #if NAMEDRSRCFORK
4174         /* recycle the deleted rsrc fork vnode to force a reclaim, which
4175          * will cause its shadow file to go away if necessary.
4176          */
4177          if (vp && (vnode_isnamedstream(vp)) &&
4178                 (vp->v_parent != NULLVP) &&
4179                 vnode_isshadow(vp)) {
4180                         vnode_recycle(vp);
4181          }
4182 #endif
4183         /*
4184          * nameidone has to happen before we vnode_put(dvp)
4185          * since it may need to release the fs_nodelock on the dvp
4186          */
4187         nameidone(ndp);
4188         vnode_put(dvp);
4189         if (vp) {
4190                 vnode_put(vp);
4191         }
4192         return (error);
4193 }
4194
4195 /*
4196  * Delete a name from the filesystem using POSIX semantics.
4197  */
4198 int
4199 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
4200 {
4201         struct nameidata nd;
4202         vfs_context_t ctx = vfs_context_current();
4203
4204         NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE,
4205                uap->path, ctx);
4206         return unlink1(ctx, &nd, 0);
4207 }
4208
4209 /*
4210  * Delete a name from the filesystem using Carbon semantics.
4211  */
4212 int
4213 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
4214 {
4215         struct nameidata nd;
4216         vfs_context_t ctx = vfs_context_current();
4217
4218         NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE,
4219                uap->path, ctx);
4220         return unlink1(ctx, &nd, VNODE_REMOVE_NODELETEBUSY);
4221 }
4222
4223 /*
4224  * Reposition read/write file offset.
4225  */
4226 int
4227 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
4228 {
4229         struct fileproc *fp;
4230         vnode_t vp;
4231         struct vfs_context *ctx;
4232         off_t offset = uap->offset, file_size;
4233         int error;
4234
4235         if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4236                 if (error == ENOTSUP)
4237                         return (ESPIPE);
4238                 return (error);
4239         }
4240         if (vnode_isfifo(vp)) {
4241                 file_drop(uap->fd);
4242                 return(ESPIPE);
4243         }
4244
4245
4246         ctx = vfs_context_current();
4247 #if CONFIG_MACF
4248         if (uap->whence == L_INCR && uap->offset == 0)
4249                 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4250                     fp->f_fglob);
4251         else
4252                 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4253                     fp->f_fglob);
4254         if (error) {
4255                 file_drop(uap->fd);
4256                 return (error);
4257         }
4258 #endif
4259         if ( (error = vnode_getwithref(vp)) ) {
4260                 file_drop(uap->fd);
4261                 return(error);
4262         }
4263
4264         switch (uap->whence) {
4265         case L_INCR:
4266                 offset += fp->f_fglob->fg_offset;
4267                 break;
4268         case L_XTND:
4269                 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
4270                         break;
4271                 offset += file_size;
4272                 break;
4273         case L_SET:
4274                 break;
4275         default:
4276                 error = EINVAL;
4277         }
4278         if (error == 0) {
4279                 if (uap->offset > 0 && offset < 0) {
4280                         /* Incremented/relative move past max size */
4281                         error = EOVERFLOW;
4282                 } else {
4283                         /*
4284                          * Allow negative offsets on character devices, per
4285                          * POSIX 1003.1-2001.  Most likely for writing disk
4286                          * labels.
4287                          */
4288                         if (offset < 0 && vp->v_type != VCHR) {
4289                                 /* Decremented/relative move before start */
4290                                 error = EINVAL;
4291                         } else {
4292                                 /* Success */
4293                                 fp->f_fglob->fg_offset = offset;
4294                                 *retval = fp->f_fglob->fg_offset;
4295                         }
4296                 }
4297         }
4298
4299         /*
4300          * An lseek can affect whether data is "available to read."  Use
4301          * hint of NOTE_NONE so no EVFILT_VNODE events fire
4302          */
4303         post_event_if_success(vp, error, NOTE_NONE);
4304         (void)vnode_put(vp);
4305         file_drop(uap->fd);
4306         return (error);
4307 }
4308
4309
4310 /*
4311  * Check access permissions.
4312  *
4313  * Returns:     0                       Success
4314  *              vnode_authorize:???
4315  */
4316 static int
4317 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
4318 {
4319         kauth_action_t action;
4320         int error;
4321
4322         /*
4323          * If just the regular access bits, convert them to something
4324          * that vnode_authorize will understand.
4325          */
4326         if (!(uflags & _ACCESS_EXTENDED_MASK)) {
4327                 action = 0;
4328                 if (uflags & R_OK)
4329                         action |= KAUTH_VNODE_READ_DATA;        /* aka KAUTH_VNODE_LIST_DIRECTORY */
4330                 if (uflags & W_OK) {
4331                         if (vnode_isdir(vp)) {
4332                                 action |= KAUTH_VNODE_ADD_FILE |
4333                                     KAUTH_VNODE_ADD_SUBDIRECTORY;
4334                                 /* might want delete rights here too */
4335                         } else {
4336                                 action |= KAUTH_VNODE_WRITE_DATA;
4337                         }
4338                 }
4339                 if (uflags & X_OK) {
4340                         if (vnode_isdir(vp)) {
4341                                 action |= KAUTH_VNODE_SEARCH;
4342                         } else {
4343                                 action |= KAUTH_VNODE_EXECUTE;
4344                         }
4345                 }
4346         } else {
4347                 /* take advantage of definition of uflags */
4348                 action = uflags >> 8;
4349         }
4350
4351 #if CONFIG_MACF
4352         error = mac_vnode_check_access(ctx, vp, uflags);
4353         if (error)
4354                 return (error);
4355 #endif /* MAC */
4356
4357         /* action == 0 means only check for existence */
4358         if (action != 0) {
4359                 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
4360         } else {
4361                 error = 0;
4362         }
4363
4364         return(error);
4365 }
4366
4367
4368
4369 /*
4370  * access_extended: Check access permissions in bulk.
4371  *
4372  * Description: uap->entries            Pointer to an array of accessx
4373  *                                      descriptor structs, plus one or
4374  *                                      more NULL terminated strings (see
4375  *                                      "Notes" section below).
4376  *              uap->size               Size of the area pointed to by
4377  *                                      uap->entries.
4378  *              uap->results            Pointer to the results array.
4379  *
4380  * Returns:     0                       Success
4381  *              ENOMEM                  Insufficient memory
4382  *              EINVAL                  Invalid arguments
4383  *              namei:EFAULT            Bad address
4384  *              namei:ENAMETOOLONG      Filename too long
4385  *              namei:ENOENT            No such file or directory
4386  *              namei:ELOOP             Too many levels of symbolic links
4387  *              namei:EBADF             Bad file descriptor
4388  *              namei:ENOTDIR           Not a directory
4389  *              namei:???
4390  *              access1:
4391  *
4392  * Implicit returns:
4393  *              uap->results            Array contents modified
4394  *
4395  * Notes:       The uap->entries are structured as an arbitrary length array
4396  *              of accessx descriptors, followed by one or more NULL terminated
4397  *              strings
4398  *
4399  *                      struct accessx_descriptor[0]
4400  *                      ...
4401  *                      struct accessx_descriptor[n]
4402  *                      char name_data[0];
4403  *
4404  *              We determine the entry count by walking the buffer containing
4405  *              the uap->entries argument descriptor.  For each descriptor we
4406  *              see, the valid values for the offset ad_name_offset will be
4407  *              in the byte range:
4408  *
4409  *                      [ uap->entries + sizeof(struct accessx_descriptor) ]
4410  *                                              to
4411  *                              [ uap->entries + uap->size - 2 ]
4412  *
4413  *              since we must have at least one string, and the string must
4414  *              be at least one character plus the NULL terminator in length.
4415  *
4416  * XXX:         Need to support the check-as uid argument
4417  */
4418 int
4419 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
4420 {
4421         struct accessx_descriptor *input = NULL;
4422         errno_t *result = NULL;
4423         errno_t error = 0;
4424         int wantdelete = 0;
4425         unsigned int desc_max, desc_actual, i, j;
4426         struct vfs_context context;
4427         struct nameidata nd;
4428         int niopts;
4429         vnode_t vp = NULL;
4430         vnode_t dvp = NULL;
4431 #define ACCESSX_MAX_DESCR_ON_STACK 10
4432         struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
4433
4434         context.vc_ucred = NULL;
4435
4436         /*
4437          * Validate parameters; if valid, copy the descriptor array and string
4438          * arguments into local memory.  Before proceeding, the following
4439          * conditions must have been met:
4440          *
4441          * o    The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
4442          * o    There must be sufficient room in the request for at least one
4443          *      descriptor and a one yte NUL terminated string.
4444          * o    The allocation of local storage must not fail.
4445          */
4446         if (uap->size > ACCESSX_MAX_TABLESIZE)
4447                 return(ENOMEM);
4448         if (uap->size < (sizeof(struct accessx_descriptor) + 2))
4449                 return(EINVAL);
4450         if (uap->size <= sizeof (stack_input)) {
4451                 input = stack_input;
4452         } else {
4453         MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
4454         if (input == NULL) {
4455                 error = ENOMEM;
4456                 goto out;
4457         }
4458         }
4459         error = copyin(uap->entries, input, uap->size);
4460         if (error)
4461                 goto out;
4462
4463         AUDIT_ARG(opaque, input, uap->size);
4464
4465         /*
4466          * Force NUL termination of the copyin buffer to avoid nami() running
4467          * off the end.  If the caller passes us bogus data, they may get a
4468          * bogus result.
4469          */
4470         ((char *)input)[uap->size - 1] = 0;
4471
4472         /*
4473          * Access is defined as checking against the process' real identity,
4474          * even if operations are checking the effective identity.  This
4475          * requires that we use a local vfs context.
4476          */
4477         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
4478         context.vc_thread = current_thread();
4479
4480         /*
4481          * Find out how many entries we have, so we can allocate the result
4482          * array by walking the list and adjusting the count downward by the
4483          * earliest string offset we see.
4484          */
4485         desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
4486         desc_actual = desc_max;
4487         for (i = 0; i < desc_actual; i++) {
4488                 /*
4489                  * Take the offset to the name string for this entry and
4490                  * convert to an input array index, which would be one off
4491                  * the end of the array if this entry was the lowest-addressed
4492                  * name string.
4493                  */
4494                 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
4495
4496                 /*
4497                  * An offset greater than the max allowable offset is an error.
4498                  * It is also an error for any valid entry to point
4499                  * to a location prior to the end of the current entry, if
4500                  * it's not a reference to the string of the previous entry.
4501                  */
4502                 if (j > desc_max || (j != 0 && j <= i)) {
4503                         error = EINVAL;
4504                         goto out;
4505                 }
4506
4507                 /*
4508                  * An offset of 0 means use the previous descriptor's offset;
4509                  * this is used to chain multiple requests for the same file
4510                  * to avoid multiple lookups.
4511                  */
4512                 if (j == 0) {
4513                         /* This is not valid for the first entry */
4514                         if (i == 0) {
4515                                 error = EINVAL;
4516                                 goto out;
4517                         }
4518                         continue;
4519                 }
4520
4521                 /*
4522                  * If the offset of the string for this descriptor is before
4523                  * what we believe is the current actual last descriptor,
4524                  * then we need to adjust our estimate downward; this permits
4525                  * the string table following the last descriptor to be out
4526                  * of order relative to the descriptor list.
4527                  */
4528                 if (j < desc_actual)
4529                         desc_actual = j;
4530         }
4531
4532         /*
4533          * We limit the actual number of descriptors we are willing to process
4534          * to a hard maximum of ACCESSX_MAX_DESCRIPTORS.  If the number being
4535          * requested does not exceed this limit,
4536          */
4537         if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
4538                 error = ENOMEM;
4539                 goto out;
4540         }
4541         MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
4542         if (result == NULL) {
4543                 error = ENOMEM;
4544                 goto out;
4545         }
4546
4547         /*
4548          * Do the work by iterating over the descriptor entries we know to
4549          * at least appear to contain valid data.
4550          */
4551         error = 0;
4552         for (i = 0; i < desc_actual; i++) {
4553                 /*
4554                  * If the ad_name_offset is 0, then we use the previous
4555                  * results to make the check; otherwise, we are looking up
4556                  * a new file name.
4557                  */
4558                 if (input[i].ad_name_offset != 0) {
4559                         /* discard old vnodes */
4560                         if (vp) {
4561                                 vnode_put(vp);
4562                                 vp = NULL;
4563                         }
4564                         if (dvp) {
4565                                 vnode_put(dvp);
4566                                 dvp = NULL;
4567                         }
4568
4569                         /*
4570                          * Scan forward in the descriptor list to see if we
4571                          * need the parent vnode.  We will need it if we are
4572                          * deleting, since we must have rights  to remove
4573                          * entries in the parent directory, as well as the
4574                          * rights to delete the object itself.
4575                          */
4576                         wantdelete = input[i].ad_flags & _DELETE_OK;
4577                         for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
4578                                 if (input[j].ad_flags & _DELETE_OK)
4579                                         wantdelete = 1;
4580
4581                         niopts = FOLLOW | AUDITVNPATH1;
4582
4583                         /* need parent for vnode_authorize for deletion test */
4584                         if (wantdelete)
4585                                 niopts |= WANTPARENT;
4586
4587                         /* do the lookup */
4588                         NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
4589                                CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
4590                                &context);
4591                         error = namei(&nd);
4592                         if (!error) {
4593                                 vp = nd.ni_vp;
4594                                 if (wantdelete)
4595                                         dvp = nd.ni_dvp;
4596                         }
4597                         nameidone(&nd);
4598                 }
4599
4600                 /*
4601                  * Handle lookup errors.
4602                  */
4603                 switch(error) {
4604                 case ENOENT:
4605                 case EACCES:
4606                 case EPERM:
4607                 case ENOTDIR:
4608                         result[i] = error;
4609                         break;
4610                 case 0:
4611                         /* run this access check */
4612                         result[i] = access1(vp, dvp, input[i].ad_flags, &context);
4613                         break;
4614                 default:
4615                         /* fatal lookup error */
4616
4617                         goto out;
4618                 }
4619         }
4620
4621         AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
4622
4623         /* copy out results */
4624         error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
4625
4626 out:
4627         if (input && input != stack_input)
4628                 FREE(input, M_TEMP);
4629         if (result)
4630                 FREE(result, M_TEMP);
4631         if (vp)
4632                 vnode_put(vp);
4633         if (dvp)
4634                 vnode_put(dvp);
4635         if (IS_VALID_CRED(context.vc_ucred))
4636                 kauth_cred_unref(&context.vc_ucred);
4637         return(error);
4638 }
4639
4640
4641 /*
4642  * Returns:     0                       Success
4643  *              namei:EFAULT            Bad address
4644  *              namei:ENAMETOOLONG      Filename too long
4645  *              namei:ENOENT            No such file or directory
4646  *              namei:ELOOP             Too many levels of symbolic links
4647  *              namei:EBADF             Bad file descriptor
4648  *              namei:ENOTDIR           Not a directory
4649  *              namei:???
4650  *              access1:
4651  */
4652 int
4653 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
4654 {
4655         int error;
4656         struct nameidata nd;
4657         int niopts;
4658         struct vfs_context context;
4659 #if NAMEDRSRCFORK
4660         int is_namedstream = 0;
4661 #endif
4662
4663         /*
4664          * Access is defined as checking against the process'
4665          * real identity, even if operations are checking the
4666          * effective identity.  So we need to tweak the credential
4667          * in the context.
4668          */
4669         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
4670         context.vc_thread = current_thread();
4671
4672         niopts = FOLLOW | AUDITVNPATH1;
4673         /* need parent for vnode_authorize for deletion test */
4674         if (uap->flags & _DELETE_OK)
4675                 niopts |= WANTPARENT;
4676         NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_USERSPACE,
4677                uap->path, &context);
4678
4679 #if NAMEDRSRCFORK
4680         /* access(F_OK) calls are allowed for resource forks. */
4681         if (uap->flags == F_OK)
4682                 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4683 #endif
4684         error = namei(&nd);
4685         if (error)
4686                 goto out;
4687
4688 #if NAMEDRSRCFORK
4689         /* Grab reference on the shadow stream file vnode to
4690          * force an inactive on release which will mark it
4691          * for recycle.
4692          */
4693         if (vnode_isnamedstream(nd.ni_vp) &&
4694             (nd.ni_vp->v_parent != NULLVP) &&
4695             vnode_isshadow(nd.ni_vp)) {
4696                 is_namedstream = 1;
4697                 vnode_ref(nd.ni_vp);
4698         }
4699 #endif
4700
4701         error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context);
4702
4703 #if NAMEDRSRCFORK
4704         if (is_namedstream) {
4705                 vnode_rele(nd.ni_vp);
4706         }
4707 #endif
4708
4709         vnode_put(nd.ni_vp);
4710         if (uap->flags & _DELETE_OK)
4711                 vnode_put(nd.ni_dvp);
4712         nameidone(&nd);
4713
4714 out:
4715         kauth_cred_unref(&context.vc_ucred);
4716         return(error);
4717 }
4718
4719
4720 /*
4721  * Returns:     0                       Success
4722  *              EFAULT
4723  *      copyout:EFAULT
4724  *      namei:???
4725  *      vn_stat:???
4726  */
4727 static int
4728 stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4729 {
4730         union {
4731                 struct stat sb;
4732                 struct stat64 sb64;
4733         } source;
4734         union {
4735                 struct user64_stat user64_sb;
4736                 struct user32_stat user32_sb;
4737                 struct user64_stat64 user64_sb64;
4738                 struct user32_stat64 user32_sb64;
4739         } dest;
4740         caddr_t sbp;
4741         int error, my_size;
4742         kauth_filesec_t fsec;
4743         size_t xsecurity_bufsize;
4744         void * statptr;
4745
4746 #if NAMEDRSRCFORK
4747         int is_namedstream = 0;
4748         /* stat calls are allowed for resource forks. */
4749         ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4750 #endif
4751         error = namei(ndp);
4752         if (error)
4753                 return (error);
4754         fsec = KAUTH_FILESEC_NONE;
4755
4756         statptr = (void *)&source;
4757
4758 #if NAMEDRSRCFORK
4759         /* Grab reference on the shadow stream file vnode to
4760          * force an inactive on release which will mark it
4761          * for recycle.
4762          */
4763         if (vnode_isnamedstream(ndp->ni_vp) &&
4764             (ndp->ni_vp->v_parent != NULLVP) &&
4765             vnode_isshadow(ndp->ni_vp)) {
4766                 is_namedstream = 1;
4767                 vnode_ref(ndp->ni_vp);
4768         }
4769 #endif
4770
4771         error = vn_stat(ndp->ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
4772
4773 #if NAMEDRSRCFORK
4774         if (is_namedstream) {
4775                 vnode_rele(ndp->ni_vp);
4776         }
4777 #endif
4778         vnode_put(ndp->ni_vp);
4779         nameidone(ndp);
4780
4781         if (error)
4782                 return (error);
4783         /* Zap spare fields */
4784         if (isstat64 != 0) {
4785                 source.sb64.st_lspare = 0;
4786                 source.sb64.st_qspare[0] = 0LL;
4787                 source.sb64.st_qspare[1] = 0LL;
4788                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
4789                         munge_user64_stat64(&source.sb64, &dest.user64_sb64);
4790                         my_size = sizeof(dest.user64_sb64);
4791                         sbp = (caddr_t)&dest.user64_sb64;
4792                 } else {
4793                         munge_user32_stat64(&source.sb64, &dest.user32_sb64);
4794                         my_size = sizeof(dest.user32_sb64);
4795                         sbp = (caddr_t)&dest.user32_sb64;
4796                 }
4797                 /*
4798                  * Check if we raced (post lookup) against the last unlink of a file.
4799                  */
4800                 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
4801                         source.sb64.st_nlink = 1;
4802                 }
4803         } else {
4804                 source.sb.st_lspare = 0;
4805                 source.sb.st_qspare[0] = 0LL;
4806                 source.sb.st_qspare[1] = 0LL;
4807                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
4808                         munge_user64_stat(&source.sb, &dest.user64_sb);
4809                         my_size = sizeof(dest.user64_sb);
4810                         sbp = (caddr_t)&dest.user64_sb;
4811                 } else {
4812                         munge_user32_stat(&source.sb, &dest.user32_sb);
4813                         my_size = sizeof(dest.user32_sb);
4814                         sbp = (caddr_t)&dest.user32_sb;
4815                 }
4816
4817                 /*
4818                  * Check if we raced (post lookup) against the last unlink of a file.
4819                  */
4820                 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
4821                         source.sb.st_nlink = 1;
4822                 }
4823         }
4824         if ((error = copyout(sbp, ub, my_size)) != 0)
4825                 goto out;
4826
4827         /* caller wants extended security information? */
4828         if (xsecurity != USER_ADDR_NULL) {
4829
4830                 /* did we get any? */
4831                 if (fsec == KAUTH_FILESEC_NONE) {
4832                         if (susize(xsecurity_size, 0) != 0) {
4833                                 error = EFAULT;
4834                                 goto out;
4835                         }
4836                 } else {
4837                         /* find the user buffer size */
4838                         xsecurity_bufsize = fusize(xsecurity_size);
4839
4840                         /* copy out the actual data size */
4841                         if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
4842                                 error = EFAULT;
4843                                 goto out;
4844                         }
4845
4846                         /* if the caller supplied enough room, copy out to it */
4847                         if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
4848                                 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
4849                 }
4850         }
4851 out:
4852         if (fsec != KAUTH_FILESEC_NONE)
4853                 kauth_filesec_free(fsec);
4854         return (error);
4855 }
4856
4857 /*
4858  * Get file status; this version follows links.
4859  *
4860  * Returns:     0                       Success
4861  *      stat2:???                       [see stat2() in this file]
4862  */
4863 static int
4864 stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4865 {
4866         struct nameidata nd;
4867         vfs_context_t ctx = vfs_context_current();
4868
4869         NDINIT(&nd, LOOKUP, OP_GETATTR, NOTRIGGER | FOLLOW | AUDITVNPATH1,
4870             UIO_USERSPACE, path, ctx);
4871         return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
4872 }
4873
4874 /*
4875  * stat_extended: Get file status; with extended security (ACL).
4876  *
4877  * Parameters:    p                       (ignored)
4878  *                uap                     User argument descriptor (see below)
4879  *                retval                  (ignored)
4880  *
4881  * Indirect:      uap->path               Path of file to get status from
4882  *                uap->ub                 User buffer (holds file status info)
4883  *                uap->xsecurity          ACL to get (extended security)
4884  *                uap->xsecurity_size     Size of ACL
4885  *
4886  * Returns:        0                      Success
4887  *                !0                      errno value
4888  *
4889  */
4890 int
4891 stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused int32_t *retval)
4892 {
4893         return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
4894 }
4895
4896 /*
4897  * Returns:     0                       Success
4898  *      stat1:???                       [see stat1() in this file]
4899  */
4900 int
4901 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
4902 {
4903         return(stat1(uap->path, uap->ub, 0, 0, 0));
4904 }
4905
4906 int
4907 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
4908 {
4909         return(stat1(uap->path, uap->ub, 0, 0, 1));
4910 }
4911
4912 /*
4913  * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
4914  *
4915  * Parameters:    p                       (ignored)
4916  *                uap                     User argument descriptor (see below)
4917  *                retval                  (ignored)
4918  *
4919  * Indirect:      uap->path               Path of file to get status from
4920  *                uap->ub                 User buffer (holds file status info)
4921  *                uap->xsecurity          ACL to get (extended security)
4922  *                uap->xsecurity_size     Size of ACL
4923  *
4924  * Returns:        0                      Success
4925  *                !0                      errno value
4926  *
4927  */
4928 int
4929 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
4930 {
4931         return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
4932 }
4933 /*
4934  * Get file status; this version does not follow links.
4935  */
4936 static int
4937 lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4938 {
4939         struct nameidata nd;
4940         vfs_context_t ctx = vfs_context_current();
4941
4942         NDINIT(&nd, LOOKUP, OP_GETATTR, NOTRIGGER | NOFOLLOW | AUDITVNPATH1,
4943             UIO_USERSPACE, path, ctx);
4944
4945         return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
4946 }
4947
4948 /*
4949  * lstat_extended: Get file status; does not follow links; with extended security (ACL).
4950  *
4951  * Parameters:    p                       (ignored)
4952  *                uap                     User argument descriptor (see below)
4953  *                retval                  (ignored)
4954  *
4955  * Indirect:      uap->path               Path of file to get status from
4956  *                uap->ub                 User buffer (holds file status info)
4957  *                uap->xsecurity          ACL to get (extended security)
4958  *                uap->xsecurity_size     Size of ACL
4959  *
4960  * Returns:        0                      Success
4961  *                !0                      errno value
4962  *
4963  */
4964 int
4965 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
4966 {
4967         return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
4968 }
4969
4970 int
4971 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
4972 {
4973         return(lstat1(uap->path, uap->ub, 0, 0, 0));
4974 }
4975
4976 int
4977 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
4978 {
4979         return(lstat1(uap->path, uap->ub, 0, 0, 1));
4980 }
4981
4982 /*
4983  * lstat64_extended: Get file status; can handle large inode numbers; does not
4984  * follow links; with extended security (ACL).
4985  *
4986  * Parameters:    p                       (ignored)
4987  *                uap                     User argument descriptor (see below)
4988  *                retval                  (ignored)
4989  *
4990  * Indirect:      uap->path               Path of file to get status from
4991  *                uap->ub                 User buffer (holds file status info)
4992  *                uap->xsecurity          ACL to get (extended security)
4993  *                uap->xsecurity_size     Size of ACL
4994  *
4995  * Returns:        0                      Success
4996  *                !0                      errno value
4997  *
4998  */
4999 int
5000 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
5001 {
5002         return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
5003 }
5004
5005 /*
5006  * Get configurable pathname variables.
5007  *
5008  * Returns:     0                       Success
5009  *      namei:???
5010  *      vn_pathconf:???
5011  *
5012  * Notes:       Global implementation  constants are intended to be
5013  *              implemented in this function directly; all other constants
5014  *              are per-FS implementation, and therefore must be handled in
5015  *              each respective FS, instead.
5016  *
5017  * XXX We implement some things globally right now that should actually be
5018  * XXX per-FS; we will need to deal with this at some point.
5019  */
5020 /* ARGSUSED */
5021 int
5022 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
5023 {
5024         int error;
5025         struct nameidata nd;
5026         vfs_context_t ctx = vfs_context_current();
5027
5028         NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
5029                 UIO_USERSPACE, uap->path, ctx);
5030         error = namei(&nd);
5031         if (error)
5032                 return (error);
5033
5034         error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
5035
5036         vnode_put(nd.ni_vp);
5037         nameidone(&nd);
5038         return (error);
5039 }
5040
5041 /*
5042  * Return target name of a symbolic link.
5043  */
5044 /* ARGSUSED */
5045 int
5046 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5047 {
5048         vnode_t vp;
5049         uio_t auio;
5050         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5051         int error;
5052         struct nameidata nd;
5053         vfs_context_t ctx = vfs_context_current();
5054         char uio_buf[ UIO_SIZEOF(1) ];
5055
5056         NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5057                 UIO_USERSPACE, uap->path, ctx);
5058         error = namei(&nd);
5059         if (error)
5060                 return (error);
5061         vp = nd.ni_vp;
5062
5063         nameidone(&nd);
5064
5065         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
5066                                                                   &uio_buf[0], sizeof(uio_buf));
5067         uio_addiov(auio, uap->buf, uap->count);
5068         if (vp->v_type != VLNK)
5069                 error = EINVAL;
5070         else {
5071 #if CONFIG_MACF
5072                 error = mac_vnode_check_readlink(ctx,
5073                     vp);
5074 #endif
5075                 if (error == 0)
5076                         error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx);
5077                 if (error == 0)
5078                         error = VNOP_READLINK(vp, auio, ctx);
5079         }
5080         vnode_put(vp);
5081
5082         /* Safe: uio_resid() is bounded above by "count", and "count" is an int  */
5083         *retval = uap->count - (int)uio_resid(auio);
5084         return (error);
5085 }
5086
5087 /*
5088  * Change file flags.
5089  */
5090 static int
5091 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5092 {
5093         struct vnode_attr va;
5094         kauth_action_t action;
5095         int error;
5096
5097         VATTR_INIT(&va);
5098         VATTR_SET(&va, va_flags, flags);
5099
5100 #if CONFIG_MACF
5101         error = mac_vnode_check_setflags(ctx, vp, flags);
5102         if (error)
5103                 goto out;
5104 #endif
5105
5106         /* request authorisation, disregard immutability */
5107         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5108                 goto out;
5109         /*
5110          * Request that the auth layer disregard those file flags it's allowed to when
5111          * authorizing this operation; we need to do this in order to be able to
5112          * clear immutable flags.
5113          */
5114         if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5115                 goto out;
5116         error = vnode_setattr(vp, &va, ctx);
5117
5118         if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5119                 error = ENOTSUP;
5120         }
5121 out:
5122         vnode_put(vp);
5123         return(error);
5124 }
5125
5126 /*
5127  * Change flags of a file given a path name.
5128  */
5129 /* ARGSUSED */
5130 int
5131 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
5132 {
5133         vnode_t vp;
5134         vfs_context_t ctx = vfs_context_current();
5135         int error;
5136         struct nameidata nd;
5137
5138         AUDIT_ARG(fflags, uap->flags);
5139         NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5140                 UIO_USERSPACE, uap->path, ctx);
5141         error = namei(&nd);
5142         if (error)
5143                 return (error);
5144         vp = nd.ni_vp;
5145         nameidone(&nd);
5146
5147         error = chflags1(vp, uap->flags, ctx);
5148
5149         return(error);
5150 }
5151
5152 /*
5153  * Change flags of a file given a file descriptor.
5154  */
5155 /* ARGSUSED */
5156 int
5157 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
5158 {
5159         vnode_t vp;
5160         int error;
5161
5162         AUDIT_ARG(fd, uap->fd);
5163         AUDIT_ARG(fflags, uap->flags);
5164         if ( (error = file_vnode(uap->fd, &vp)) )
5165                 return (error);
5166
5167         if ((error = vnode_getwithref(vp))) {
5168                 file_drop(uap->fd);
5169                 return(error);
5170         }
5171
5172         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5173
5174         error = chflags1(vp, uap->flags, vfs_context_current());
5175
5176         file_drop(uap->fd);
5177         return (error);
5178 }
5179
5180 /*
5181  * Change security information on a filesystem object.
5182  *
5183  * Returns:     0                       Success
5184  *              EPERM                   Operation not permitted
5185  *              vnode_authattr:???      [anything vnode_authattr can return]
5186  *              vnode_authorize:???     [anything vnode_authorize can return]
5187  *              vnode_setattr:???       [anything vnode_setattr can return]
5188  *
5189  * Notes:       If vnode_authattr or vnode_authorize return EACCES, it will be
5190  *              translated to EPERM before being returned.
5191  */
5192 static int
5193 chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
5194 {
5195         kauth_action_t action;
5196         int error;
5197
5198         AUDIT_ARG(mode, vap->va_mode);
5199         /* XXX audit new args */
5200
5201 #if NAMEDSTREAMS
5202         /* chmod calls are not allowed for resource forks. */
5203         if (vp->v_flag & VISNAMEDSTREAM) {
5204                 return (EPERM);
5205         }
5206 #endif
5207
5208 #if CONFIG_MACF
5209         if (VATTR_IS_ACTIVE(vap, va_mode) &&
5210             (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
5211                 return (error);
5212 #endif
5213
5214         /* make sure that the caller is allowed to set this security information */
5215         if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
5216             ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5217                 if (error == EACCES)
5218                         error = EPERM;
5219                 return(error);
5220         }
5221
5222         error = vnode_setattr(vp, vap, ctx);
5223
5224         return (error);
5225 }
5226
5227
5228 /*
5229  * Change mode of a file given a path name.
5230  *
5231  * Returns:     0                       Success
5232  *              namei:???               [anything namei can return]
5233  *              chmod2:???              [anything chmod2 can return]
5234  */
5235 static int
5236 chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
5237 {
5238         struct nameidata nd;
5239         int error;
5240
5241         NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5242                 UIO_USERSPACE, path, ctx);
5243         if ((error = namei(&nd)))
5244                 return (error);
5245         error = chmod2(ctx, nd.ni_vp, vap);
5246         vnode_put(nd.ni_vp);
5247         nameidone(&nd);
5248         return(error);
5249 }
5250
5251 /*
5252  * chmod_extended: Change the mode of a file given a path name; with extended
5253  * argument list (including extended security (ACL)).
5254  *
5255  * Parameters:  p                       Process requesting the open
5256  *              uap                     User argument descriptor (see below)
5257  *              retval                  (ignored)
5258  *
5259  * Indirect:    uap->path               Path to object (same as 'chmod')
5260  *              uap->uid                UID to set
5261  *              uap->gid                GID to set
5262  *              uap->mode               File mode to set (same as 'chmod')
5263  *              uap->xsecurity          ACL to set (or delete)
5264  *
5265  * Returns:     0                       Success
5266  *              !0                      errno value
5267  *
5268  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
5269  *
5270  * XXX:         We should enummerate the possible errno values here, and where
5271  *              in the code they originated.
5272  */
5273 int
5274 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
5275 {
5276         int error;
5277         struct vnode_attr va;
5278         kauth_filesec_t xsecdst;
5279
5280         AUDIT_ARG(owner, uap->uid, uap->gid);
5281
5282         VATTR_INIT(&va);
5283         if (uap->mode != -1)
5284                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5285         if (uap->uid != KAUTH_UID_NONE)
5286                 VATTR_SET(&va, va_uid, uap->uid);
5287         if (uap->gid != KAUTH_GID_NONE)
5288                 VATTR_SET(&va, va_gid, uap->gid);
5289
5290         xsecdst = NULL;
5291         switch(uap->xsecurity) {
5292                 /* explicit remove request */
5293         case CAST_USER_ADDR_T((void *)1):       /* _FILESEC_REMOVE_ACL */
5294                 VATTR_SET(&va, va_acl, NULL);
5295                 break;
5296                 /* not being set */
5297         case USER_ADDR_NULL:
5298                 break;
5299         default:
5300                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5301                         return(error);
5302                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5303                 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
5304         }
5305
5306         error = chmod1(vfs_context_current(), uap->path, &va);
5307
5308         if (xsecdst != NULL)
5309                 kauth_filesec_free(xsecdst);
5310         return(error);
5311 }
5312
5313 /*
5314  * Returns:     0                       Success
5315  *              chmod1:???              [anything chmod1 can return]
5316  */
5317 int
5318 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
5319 {
5320         struct vnode_attr va;
5321
5322         VATTR_INIT(&va);
5323         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5324
5325         return(chmod1(vfs_context_current(), uap->path, &va));
5326 }
5327
5328 /*
5329  * Change mode of a file given a file descriptor.
5330  */
5331 static int
5332 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
5333 {
5334         vnode_t vp;
5335         int error;
5336
5337         AUDIT_ARG(fd, fd);
5338
5339         if ((error = file_vnode(fd, &vp)) != 0)
5340                 return (error);
5341         if ((error = vnode_getwithref(vp)) != 0) {
5342                 file_drop(fd);
5343                 return(error);
5344         }
5345         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5346
5347         error = chmod2(vfs_context_current(), vp, vap);
5348         (void)vnode_put(vp);
5349         file_drop(fd);
5350
5351         return (error);
5352 }
5353
5354 /*
5355  * fchmod_extended: Change mode of a file given a file descriptor; with
5356  * extended argument list (including extended security (ACL)).
5357  *
5358  * Parameters:    p                       Process requesting to change file mode
5359  *                uap                     User argument descriptor (see below)
5360  *                retval                  (ignored)
5361  *
5362  * Indirect:      uap->mode               File mode to set (same as 'chmod')
5363  *                uap->uid                UID to set
5364  *                uap->gid                GID to set
5365  *                uap->xsecurity          ACL to set (or delete)
5366  *                uap->fd                 File descriptor of file to change mode
5367  *
5368  * Returns:        0                      Success
5369  *                !0                      errno value
5370  *
5371  */
5372 int
5373 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
5374 {
5375         int error;
5376         struct vnode_attr va;
5377         kauth_filesec_t xsecdst;
5378
5379         AUDIT_ARG(owner, uap->uid, uap->gid);
5380
5381         VATTR_INIT(&va);
5382         if (uap->mode != -1)
5383                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5384         if (uap->uid != KAUTH_UID_NONE)
5385                 VATTR_SET(&va, va_uid, uap->uid);
5386         if (uap->gid != KAUTH_GID_NONE)
5387                 VATTR_SET(&va, va_gid, uap->gid);
5388
5389         xsecdst = NULL;
5390         switch(uap->xsecurity) {
5391         case USER_ADDR_NULL:
5392                 VATTR_SET(&va, va_acl, NULL);
5393                 break;
5394         case CAST_USER_ADDR_T((void *)1):       /* _FILESEC_REMOVE_ACL */
5395                 VATTR_SET(&va, va_acl, NULL);
5396                 break;
5397                 /* not being set */
5398         case CAST_USER_ADDR_T(-1):
5399                 break;
5400         default:
5401                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5402                         return(error);
5403                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5404         }
5405
5406         error = fchmod1(p, uap->fd, &va);
5407
5408
5409         switch(uap->xsecurity) {
5410         case USER_ADDR_NULL:
5411         case CAST_USER_ADDR_T(-1):
5412                 break;
5413         default:
5414                 if (xsecdst != NULL)
5415                         kauth_filesec_free(xsecdst);
5416         }
5417         return(error);
5418 }
5419
5420 int
5421 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
5422 {
5423         struct vnode_attr va;
5424
5425         VATTR_INIT(&va);
5426         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5427
5428         return(fchmod1(p, uap->fd, &va));
5429 }
5430
5431
5432 /*
5433  * Set ownership given a path name.
5434  */
5435 /* ARGSUSED */
5436 static int
5437 chown1(vfs_context_t ctx, struct chown_args *uap, __unused int32_t *retval, int follow)
5438 {
5439         vnode_t vp;
5440         struct vnode_attr va;
5441         int error;
5442         struct nameidata nd;
5443         kauth_action_t action;
5444
5445         AUDIT_ARG(owner, uap->uid, uap->gid);
5446
5447         NDINIT(&nd, LOOKUP, OP_SETATTR,
5448                 (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1,
5449                 UIO_USERSPACE, uap->path, ctx);
5450         error = namei(&nd);
5451         if (error)
5452                 return (error);
5453         vp = nd.ni_vp;
5454
5455         nameidone(&nd);
5456
5457         VATTR_INIT(&va);
5458         if (uap->uid != VNOVAL)
5459                 VATTR_SET(&va, va_uid, uap->uid);
5460         if (uap->gid != VNOVAL)
5461                 VATTR_SET(&va, va_gid, uap->gid);
5462
5463 #if CONFIG_MACF
5464         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
5465         if (error)
5466                 goto out;
5467 #endif
5468
5469         /* preflight and authorize attribute changes */
5470         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5471                 goto out;
5472         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
5473                 goto out;
5474         error = vnode_setattr(vp, &va, ctx);
5475
5476 out:
5477         /*
5478          * EACCES is only allowed from namei(); permissions failure should
5479          * return EPERM, so we need to translate the error code.
5480          */
5481         if (error == EACCES)
5482                 error = EPERM;
5483
5484         vnode_put(vp);
5485         return (error);
5486 }
5487
5488 int
5489 chown(__unused proc_t p, struct chown_args *uap, int32_t *retval)
5490 {
5491         return chown1(vfs_context_current(), uap, retval, 1);
5492 }
5493
5494 int
5495 lchown(__unused proc_t p, struct lchown_args *uap, int32_t *retval)
5496 {
5497         /* Argument list identical, but machine generated; cast for chown1() */
5498         return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0);
5499 }
5500
5501 /*
5502  * Set ownership given a file descriptor.
5503  */
5504 /* ARGSUSED */
5505 int
5506 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
5507 {
5508         struct vnode_attr va;
5509         vfs_context_t ctx = vfs_context_current();
5510         vnode_t vp;
5511         int error;
5512         kauth_action_t action;
5513
5514         AUDIT_ARG(owner, uap->uid, uap->gid);
5515         AUDIT_ARG(fd, uap->fd);
5516
5517         if ( (error = file_vnode(uap->fd, &vp)) )
5518                 return (error);
5519
5520         if ( (error = vnode_getwithref(vp)) ) {
5521                 file_drop(uap->fd);
5522                 return(error);
5523         }
5524         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5525
5526         VATTR_INIT(&va);
5527         if (uap->uid != VNOVAL)
5528                 VATTR_SET(&va, va_uid, uap->uid);
5529         if (uap->gid != VNOVAL)
5530                 VATTR_SET(&va, va_gid, uap->gid);
5531
5532 #if NAMEDSTREAMS
5533         /* chown calls are not allowed for resource forks. */
5534         if (vp->v_flag & VISNAMEDSTREAM) {
5535                 error = EPERM;
5536                 goto out;
5537         }
5538 #endif
5539
5540 #if CONFIG_MACF
5541         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
5542         if (error)
5543                 goto out;
5544 #endif
5545
5546         /* preflight and authorize attribute changes */
5547         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5548                 goto out;
5549         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5550                 if (error == EACCES)
5551                         error = EPERM;
5552                 goto out;
5553         }
5554         error = vnode_setattr(vp, &va, ctx);
5555
5556 out:
5557         (void)vnode_put(vp);
5558         file_drop(uap->fd);
5559         return (error);
5560 }
5561
5562 static int
5563 getutimes(user_addr_t usrtvp, struct timespec *tsp)
5564 {
5565         int error;
5566
5567         if (usrtvp == USER_ADDR_NULL) {
5568                 struct timeval old_tv;
5569                 /* XXX Y2038 bug because of microtime argument */
5570                 microtime(&old_tv);
5571                 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
5572                 tsp[1] = tsp[0];
5573         } else {
5574                 if (IS_64BIT_PROCESS(current_proc())) {
5575                         struct user64_timeval tv[2];
5576                         error = copyin(usrtvp, (void *)tv, sizeof(tv));
5577                         if (error)
5578                                 return (error);
5579                         TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
5580                         TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
5581                 } else {
5582                         struct user32_timeval tv[2];
5583                         error = copyin(usrtvp, (void *)tv, sizeof(tv));
5584                         if (error)
5585                                 return (error);
5586                         TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
5587                         TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
5588                 }
5589         }
5590         return 0;
5591 }
5592
5593 static int
5594 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
5595         int nullflag)
5596 {
5597         int error;
5598         struct vnode_attr va;
5599         kauth_action_t action;
5600
5601         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5602
5603         VATTR_INIT(&va);
5604         VATTR_SET(&va, va_access_time, ts[0]);
5605         VATTR_SET(&va, va_modify_time, ts[1]);
5606         if (nullflag)
5607                 va.va_vaflags |= VA_UTIMES_NULL;
5608
5609 #if NAMEDSTREAMS
5610         /* utimes calls are not allowed for resource forks. */
5611         if (vp->v_flag & VISNAMEDSTREAM) {
5612                 error = EPERM;
5613                 goto out;
5614         }
5615 #endif
5616
5617 #if CONFIG_MACF
5618         error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
5619         if (error)
5620                 goto out;
5621 #endif
5622         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
5623                 if (!nullflag && error == EACCES)
5624                         error = EPERM;
5625                 goto out;
5626         }
5627
5628         /* since we may not need to auth anything, check here */
5629         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5630                 if (!nullflag && error == EACCES)
5631                         error = EPERM;
5632                 goto out;
5633         }
5634         error = vnode_setattr(vp, &va, ctx);
5635
5636 out:
5637         return error;
5638 }
5639
5640 /*
5641  * Set the access and modification times of a file.
5642  */
5643 /* ARGSUSED */
5644 int
5645 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
5646 {
5647         struct timespec ts[2];
5648         user_addr_t usrtvp;
5649         int error;
5650         struct nameidata nd;
5651         vfs_context_t ctx = vfs_context_current();
5652
5653         /*
5654          * AUDIT: Needed to change the order of operations to do the
5655          * name lookup first because auditing wants the path.
5656          */
5657         NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5658                 UIO_USERSPACE, uap->path, ctx);
5659         error = namei(&nd);
5660         if (error)
5661                 return (error);
5662         nameidone(&nd);
5663
5664         /*
5665          * Fetch the user-supplied time.  If usrtvp is USER_ADDR_NULL, we fetch
5666          * the current time instead.
5667          */
5668         usrtvp = uap->tptr;
5669         if ((error = getutimes(usrtvp, ts)) != 0)
5670                 goto out;
5671
5672         error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
5673
5674 out:
5675         vnode_put(nd.ni_vp);
5676         return (error);
5677 }
5678
5679 /*
5680  * Set the access and modification times of a file.
5681  */
5682 /* ARGSUSED */
5683 int
5684 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
5685 {
5686         struct timespec ts[2];
5687         vnode_t vp;
5688         user_addr_t usrtvp;
5689         int error;
5690
5691         AUDIT_ARG(fd, uap->fd);
5692         usrtvp = uap->tptr;
5693         if ((error = getutimes(usrtvp, ts)) != 0)
5694                 return (error);
5695         if ((error = file_vnode(uap->fd, &vp)) != 0)
5696                 return (error);
5697         if((error = vnode_getwithref(vp))) {
5698                 file_drop(uap->fd);
5699                 return(error);
5700         }
5701
5702         error =  setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
5703         vnode_put(vp);
5704         file_drop(uap->fd);
5705         return(error);
5706 }
5707
5708 /*
5709  * Truncate a file given its path name.
5710  */
5711 /* ARGSUSED */
5712 int
5713 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
5714 {
5715         vnode_t vp;
5716         struct vnode_attr va;
5717         vfs_context_t ctx = vfs_context_current();
5718         int error;
5719         struct nameidata nd;
5720         kauth_action_t action;
5721
5722         if (uap->length < 0)
5723                 return(EINVAL);
5724         NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
5725                 UIO_USERSPACE, uap->path, ctx);
5726         if ((error = namei(&nd)))
5727                 return (error);
5728         vp = nd.ni_vp;
5729
5730         nameidone(&nd);
5731
5732         VATTR_INIT(&va);
5733         VATTR_SET(&va, va_data_size, uap->length);
5734
5735 #if CONFIG_MACF
5736         error = mac_vnode_check_truncate(ctx, NOCRED, vp);
5737         if (error)
5738                 goto out;
5739 #endif
5740
5741         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5742                 goto out;
5743         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
5744                 goto out;
5745         error = vnode_setattr(vp, &va, ctx);
5746 out:
5747         vnode_put(vp);
5748         return (error);
5749 }
5750
5751 /*
5752  * Truncate a file given a file descriptor.
5753  */
5754 /* ARGSUSED */
5755 int
5756 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
5757 {
5758         vfs_context_t ctx = vfs_context_current();
5759         struct vnode_attr va;
5760         vnode_t vp;
5761         struct fileproc *fp;
5762         int error ;
5763         int fd = uap->fd;
5764
5765         AUDIT_ARG(fd, uap->fd);
5766         if (uap->length < 0)
5767                 return(EINVAL);
5768
5769         if ( (error = fp_lookup(p,fd,&fp,0)) ) {
5770                 return(error);
5771         }
5772
5773         switch (FILEGLOB_DTYPE(fp->f_fglob)) {
5774         case DTYPE_PSXSHM:
5775                 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
5776                 goto out;
5777         case DTYPE_VNODE:
5778                 break;
5779         default:
5780                 error = EINVAL;
5781                 goto out;
5782         }
5783
5784         vp = (vnode_t)fp->f_fglob->fg_data;
5785
5786         if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
5787                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
5788                 error = EINVAL;
5789                 goto out;
5790         }
5791
5792         if ((error = vnode_getwithref(vp)) != 0) {
5793                 goto out;
5794         }
5795
5796         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5797
5798 #if CONFIG_MACF
5799         error = mac_vnode_check_truncate(ctx,
5800             fp->f_fglob->fg_cred, vp);
5801         if (error) {
5802                 (void)vnode_put(vp);
5803                 goto out;
5804         }
5805 #endif
5806         VATTR_INIT(&va);
5807         VATTR_SET(&va, va_data_size, uap->length);
5808         error = vnode_setattr(vp, &va, ctx);
5809         (void)vnode_put(vp);
5810 out:
5811         file_drop(fd);
5812         return (error);
5813 }
5814
5815
5816 /*
5817  * Sync an open file with synchronized I/O _file_ integrity completion
5818  */
5819 /* ARGSUSED */
5820 int
5821 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
5822 {
5823         __pthread_testcancel(1);
5824         return(fsync_common(p, uap, MNT_WAIT));
5825 }
5826
5827
5828 /*
5829  * Sync an open file with synchronized I/O _file_ integrity completion
5830  *
5831  * Notes:       This is a legacy support function that does not test for
5832  *              thread cancellation points.
5833  */
5834 /* ARGSUSED */
5835 int
5836 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
5837 {
5838         return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
5839 }
5840
5841
5842 /*
5843  * Sync an open file with synchronized I/O _data_ integrity completion
5844  */
5845 /* ARGSUSED */
5846 int
5847 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
5848 {
5849         __pthread_testcancel(1);
5850         return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
5851 }
5852
5853
5854 /*
5855  * fsync_common
5856  *
5857  * Common fsync code to support both synchronized I/O file integrity completion
5858  * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
5859  *
5860  * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
5861  * will only guarantee that the file data contents are retrievable.  If
5862  * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
5863  * includes additional metadata unnecessary for retrieving the file data
5864  * contents, such as atime, mtime, ctime, etc., also be committed to stable
5865  * storage.
5866  *
5867  * Parameters:  p                               The process
5868  *              uap->fd                         The descriptor to synchronize
5869  *              flags                           The data integrity flags
5870  *
5871  * Returns:     int                             Success
5872  *      fp_getfvp:EBADF                         Bad file descriptor
5873  *      fp_getfvp:ENOTSUP                       fd does not refer to a vnode
5874  *      VNOP_FSYNC:???                          unspecified
5875  *
5876  * Notes:       We use struct fsync_args because it is a short name, and all
5877  *              caller argument structures are otherwise identical.
5878  */
5879 static int
5880 fsync_common(proc_t p, struct fsync_args *uap, int flags)
5881 {
5882         vnode_t vp;
5883         struct fileproc *fp;
5884         vfs_context_t ctx = vfs_context_current();
5885         int error;
5886
5887         AUDIT_ARG(fd, uap->fd);
5888
5889         if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
5890                 return (error);
5891         if ( (error = vnode_getwithref(vp)) ) {
5892                 file_drop(uap->fd);
5893                 return(error);
5894         }
5895
5896         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5897
5898         error = VNOP_FSYNC(vp, flags, ctx);
5899
5900 #if NAMEDRSRCFORK
5901         /* Sync resource fork shadow file if necessary. */
5902         if ((error == 0) &&
5903             (vp->v_flag & VISNAMEDSTREAM) &&
5904             (vp->v_parent != NULLVP) &&
5905             vnode_isshadow(vp) &&
5906             (fp->f_flags & FP_WRITTEN)) {
5907                 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
5908         }
5909 #endif
5910
5911         (void)vnode_put(vp);
5912         file_drop(uap->fd);
5913         return (error);
5914 }
5915
5916 /*
5917  * Duplicate files.  Source must be a file, target must be a file or
5918  * must not exist.
5919  *
5920  * XXX Copyfile authorisation checking is woefully inadequate, and will not
5921  *     perform inheritance correctly.
5922  */
5923 /* ARGSUSED */
5924 int
5925 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
5926 {
5927         vnode_t tvp, fvp, tdvp, sdvp;
5928         struct nameidata fromnd, tond;
5929         int error;
5930         vfs_context_t ctx = vfs_context_current();
5931
5932         /* Check that the flags are valid. */
5933
5934         if (uap->flags & ~CPF_MASK) {
5935                 return(EINVAL);
5936         }
5937
5938         NDINIT(&fromnd, LOOKUP, OP_COPYFILE, SAVESTART | AUDITVNPATH1,
5939                 UIO_USERSPACE, uap->from, ctx);
5940         if ((error = namei(&fromnd)))
5941                 return (error);
5942         fvp = fromnd.ni_vp;
5943
5944         NDINIT(&tond, CREATE, OP_LINK,
5945                LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
5946                UIO_USERSPACE, uap->to, ctx);
5947         if ((error = namei(&tond))) {
5948                 goto out1;
5949         }
5950         tdvp = tond.ni_dvp;
5951         tvp = tond.ni_vp;
5952
5953         if (tvp != NULL) {
5954                 if (!(uap->flags & CPF_OVERWRITE)) {
5955                         error = EEXIST;
5956                         goto out;
5957                 }
5958         }
5959         if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
5960                 error = EISDIR;
5961                 goto out;
5962         }
5963
5964         if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
5965                 goto out;
5966
5967         if (fvp == tdvp)
5968                 error = EINVAL;
5969         /*
5970          * If source is the same as the destination (that is the
5971          * same inode number) then there is nothing to do.
5972          * (fixed to have POSIX semantics - CSM 3/2/98)
5973          */
5974         if (fvp == tvp)
5975                 error = -1;
5976         if (!error)
5977                 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
5978 out:
5979         sdvp = tond.ni_startdir;
5980         /*
5981          * nameidone has to happen before we vnode_put(tdvp)
5982          * since it may need to release the fs_nodelock on the tdvp
5983          */
5984         nameidone(&tond);
5985
5986         if (tvp)
5987                 vnode_put(tvp);
5988         vnode_put(tdvp);
5989         vnode_put(sdvp);
5990 out1:
5991         vnode_put(fvp);
5992
5993         if (fromnd.ni_startdir)
5994                 vnode_put(fromnd.ni_startdir);
5995         nameidone(&fromnd);
5996
5997         if (error == -1)
5998                 return (0);
5999         return (error);
6000 }
6001
6002
6003 /*
6004  * Rename files.  Source and destination must either both be directories,
6005  * or both not be directories.  If target is a directory, it must be empty.
6006  */
6007 /* ARGSUSED */
6008 int
6009 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
6010 {
6011         vnode_t tvp, tdvp;
6012         vnode_t fvp, fdvp;
6013         struct nameidata *fromnd, *tond;
6014         vfs_context_t ctx = vfs_context_current();
6015         int error;
6016         int do_retry;
6017         int mntrename;
6018         int need_event;
6019         const char *oname = NULL;
6020         char *from_name = NULL, *to_name = NULL;
6021         int from_len=0, to_len=0;
6022         int holding_mntlock;
6023         mount_t locked_mp = NULL;
6024         vnode_t oparent = NULLVP;
6025 #if CONFIG_FSE
6026         fse_info from_finfo, to_finfo;
6027 #endif
6028         int from_truncated=0, to_truncated;
6029         int batched = 0;
6030         struct vnode_attr *fvap, *tvap;
6031         int continuing = 0;
6032         /* carving out a chunk for structs that are too big to be on stack. */
6033         struct {
6034                 struct nameidata from_node, to_node;
6035                 struct vnode_attr fv_attr, tv_attr;
6036         } * __rename_data;
6037         MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
6038         fromnd = &__rename_data->from_node;
6039         tond = &__rename_data->to_node;
6040
6041         holding_mntlock = 0;
6042         do_retry = 0;
6043 retry:
6044         fvp = tvp = NULL;
6045         fdvp = tdvp = NULL;
6046         fvap = tvap = NULL;
6047         mntrename = FALSE;
6048
6049         NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
6050                UIO_USERSPACE, uap->from, ctx);
6051         fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
6052
6053         NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6054                UIO_USERSPACE, uap->to, ctx);
6055         tond->ni_flag = NAMEI_COMPOUNDRENAME;
6056
6057 continue_lookup:
6058         if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
6059                 if ( (error = namei(fromnd)) )
6060                         goto out1;
6061                 fdvp = fromnd->ni_dvp;
6062                 fvp  = fromnd->ni_vp;
6063
6064                 if (fvp && fvp->v_type == VDIR)
6065                         tond->ni_cnd.cn_flags |= WILLBEDIR;
6066         }
6067
6068         if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
6069                 if ( (error = namei(tond)) ) {
6070                         /*
6071                          * Translate error code for rename("dir1", "dir2/.").
6072                          */
6073                         if (error == EISDIR && fvp->v_type == VDIR)
6074                                 error = EINVAL;
6075                         goto out1;
6076                 }
6077                 tdvp = tond->ni_dvp;
6078                 tvp  = tond->ni_vp;
6079         }
6080
6081         batched = vnode_compound_rename_available(fdvp);
6082         if (!fvp) {
6083                 /*
6084                  * Claim: this check will never reject a valid rename.
6085                  * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
6086                  * Suppose fdvp and tdvp are not on the same mount.
6087                  * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem.  If fvp is the root,
6088                  *      then you can't move it to within another dir on the same mountpoint.
6089                  * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
6090                  *
6091                  * If this check passes, then we are safe to pass these vnodes to the same FS.
6092                  */
6093                 if (fdvp->v_mount != tdvp->v_mount) {
6094                         error = EXDEV;
6095                         goto out1;
6096                 }
6097                 goto skipped_lookup;
6098         }
6099
6100         if (!batched) {
6101                 error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL);
6102                 if (error) {
6103                         if (error == ENOENT) {
6104                                 /*
6105                                  * We encountered a race where after doing the namei, tvp stops
6106                                  * being valid. If so, simply re-drive the rename call from the
6107                                  * top.
6108                                  */
6109                                 do_retry = 1;
6110                         }
6111                         goto out1;
6112                 }
6113         }
6114
6115         /*
6116          * If the source and destination are the same (i.e. they're
6117          * links to the same vnode) and the target file system is
6118          * case sensitive, then there is nothing to do.
6119          *
6120          * XXX Come back to this.
6121          */
6122         if (fvp == tvp) {
6123                 int pathconf_val;
6124
6125                 /*
6126                  * Note: if _PC_CASE_SENSITIVE selector isn't supported,
6127                  * then assume that this file system is case sensitive.
6128                  */
6129                 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
6130                     pathconf_val != 0) {
6131                         goto out1;
6132                 }
6133         }
6134
6135         /*
6136          * Allow the renaming of mount points.
6137          * - target must not exist
6138          * - target must reside in the same directory as source
6139          * - union mounts cannot be renamed
6140          * - "/" cannot be renamed
6141          *
6142          * XXX Handle this in VFS after a continued lookup (if we missed
6143          * in the cache to start off)
6144          */
6145         if ((fvp->v_flag & VROOT) &&
6146             (fvp->v_type == VDIR) &&
6147             (tvp == NULL)  &&
6148             (fvp->v_mountedhere == NULL)  &&
6149             (fdvp == tdvp)  &&
6150             ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0)  &&
6151             (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
6152                 vnode_t coveredvp;
6153
6154                 /* switch fvp to the covered vnode */
6155                 coveredvp = fvp->v_mount->mnt_vnodecovered;
6156                 if ( (vnode_getwithref(coveredvp)) ) {
6157                         error = ENOENT;
6158                         goto out1;
6159                 }
6160                 vnode_put(fvp);
6161
6162                 fvp = coveredvp;
6163                 mntrename = TRUE;
6164         }
6165         /*
6166          * Check for cross-device rename.
6167          */
6168         if ((fvp->v_mount != tdvp->v_mount) ||
6169             (tvp && (fvp->v_mount != tvp->v_mount))) {
6170                 error = EXDEV;
6171                 goto out1;
6172         }
6173
6174         /*
6175          * If source is the same as the destination (that is the
6176          * same inode number) then there is nothing to do...
6177          * EXCEPT if the underlying file system supports case
6178          * insensitivity and is case preserving.  In this case
6179          * the file system needs to handle the special case of
6180          * getting the same vnode as target (fvp) and source (tvp).
6181          *
6182          * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
6183          * and _PC_CASE_PRESERVING can have this exception, and they need to
6184          * handle the special case of getting the same vnode as target and
6185          * source.  NOTE: Then the target is unlocked going into vnop_rename,
6186          * so not to cause locking problems. There is a single reference on tvp.
6187          *
6188          * NOTE - that fvp == tvp also occurs if they are hard linked and
6189          * that correct behaviour then is just to return success without doing
6190          * anything.
6191          *
6192          * XXX filesystem should take care of this itself, perhaps...
6193          */
6194         if (fvp == tvp && fdvp == tdvp) {
6195                 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
6196                     !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
6197                           fromnd->ni_cnd.cn_namelen)) {
6198                         goto out1;
6199                 }
6200         }
6201
6202         if (holding_mntlock && fvp->v_mount != locked_mp) {
6203                 /*
6204                  * we're holding a reference and lock
6205                  * on locked_mp, but it no longer matches
6206                  * what we want to do... so drop our hold
6207                  */
6208                 mount_unlock_renames(locked_mp);
6209                 mount_drop(locked_mp, 0);
6210                 holding_mntlock = 0;
6211         }
6212         if (tdvp != fdvp && fvp->v_type == VDIR) {
6213                 /*
6214                  * serialize renames that re-shape
6215                  * the tree... if holding_mntlock is
6216                  * set, then we're ready to go...
6217                  * otherwise we
6218                  * first need to drop the iocounts
6219                  * we picked up, second take the
6220                  * lock to serialize the access,
6221                  * then finally start the lookup
6222                  * process over with the lock held
6223                  */
6224                 if (!holding_mntlock) {
6225                         /*
6226                          * need to grab a reference on
6227                          * the mount point before we
6228                          * drop all the iocounts... once
6229                          * the iocounts are gone, the mount
6230                          * could follow
6231                          */
6232                         locked_mp = fvp->v_mount;
6233                         mount_ref(locked_mp, 0);
6234
6235                         /*
6236                          * nameidone has to happen before we vnode_put(tvp)
6237                          * since it may need to release the fs_nodelock on the tvp
6238                          */
6239                         nameidone(tond);
6240
6241                         if (tvp)
6242                                 vnode_put(tvp);
6243                         vnode_put(tdvp);
6244
6245                         /*
6246                          * nameidone has to happen before we vnode_put(fdvp)
6247                          * since it may need to release the fs_nodelock on the fvp
6248                          */
6249                         nameidone(fromnd);
6250
6251                         vnode_put(fvp);
6252                         vnode_put(fdvp);
6253
6254                         mount_lock_renames(locked_mp);
6255                         holding_mntlock = 1;
6256
6257                         goto retry;
6258                 }
6259         } else {
6260                 /*
6261                  * when we dropped the iocounts to take
6262                  * the lock, we allowed the identity of
6263                  * the various vnodes to change... if they did,
6264                  * we may no longer be dealing with a rename
6265                  * that reshapes the tree... once we're holding
6266                  * the iocounts, the vnodes can't change type
6267                  * so we're free to drop the lock at this point
6268                  * and continue on
6269                  */
6270                 if (holding_mntlock) {
6271                         mount_unlock_renames(locked_mp);
6272                         mount_drop(locked_mp, 0);
6273                         holding_mntlock = 0;
6274                 }
6275         }
6276
6277         // save these off so we can later verify that fvp is the same
6278         oname   = fvp->v_name;
6279         oparent = fvp->v_parent;
6280
6281 skipped_lookup:
6282 #if CONFIG_FSE
6283         need_event = need_fsevent(FSE_RENAME, fdvp);
6284         if (need_event) {
6285                 if (fvp) {
6286                         get_fse_info(fvp, &from_finfo, ctx);
6287                 } else {
6288                         error = vfs_get_notify_attributes(&__rename_data->fv_attr);
6289                         if (error) {
6290                                 goto out1;
6291                         }
6292
6293                         fvap = &__rename_data->fv_attr;
6294                 }
6295
6296                 if (tvp) {
6297                         get_fse_info(tvp, &to_finfo, ctx);
6298                 } else if (batched) {
6299                         error = vfs_get_notify_attributes(&__rename_data->tv_attr);
6300                         if (error) {
6301                                 goto out1;
6302                         }
6303
6304                         tvap = &__rename_data->tv_attr;
6305                 }
6306         }
6307 #else
6308         need_event = 0;
6309 #endif /* CONFIG_FSE */
6310
6311         if (need_event || kauth_authorize_fileop_has_listeners()) {
6312                 if (from_name == NULL) {
6313                         GET_PATH(from_name);
6314                         if (from_name == NULL) {
6315                                 error = ENOMEM;
6316                                 goto out1;
6317                         }
6318                 }
6319
6320                 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
6321
6322                 if (to_name == NULL) {
6323                         GET_PATH(to_name);
6324                         if (to_name == NULL) {
6325                                 error = ENOMEM;
6326                                 goto out1;
6327                         }
6328                 }
6329
6330                 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
6331         }
6332
6333         error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
6334                             tdvp, &tvp, &tond->ni_cnd, tvap,
6335                             0, ctx);
6336
6337         if (holding_mntlock) {
6338                 /*
6339                  * we can drop our serialization
6340                  * lock now
6341                  */
6342                 mount_unlock_renames(locked_mp);
6343                 mount_drop(locked_mp, 0);
6344                 holding_mntlock = 0;
6345         }
6346         if (error) {
6347                 if (error == EKEEPLOOKING) {
6348                         if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6349                                 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6350                                         panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
6351                                 }
6352                         }
6353
6354                         fromnd->ni_vp = fvp;
6355                         tond->ni_vp = tvp;
6356
6357                         goto continue_lookup;
6358                 }
6359
6360                 /*
6361                  * We may encounter a race in the VNOP where the destination didn't
6362                  * exist when we did the namei, but it does by the time we go and
6363                  * try to create the entry. In this case, we should re-drive this rename
6364                  * call from the top again.  Currently, only HFS bubbles out ERECYCLE,
6365                  * but other filesystems susceptible to this race could return it, too.
6366                  */
6367                 if (error == ERECYCLE) {
6368                         do_retry = 1;
6369                 }
6370
6371                 goto out1;
6372         }
6373
6374         /* call out to allow 3rd party notification of rename.
6375          * Ignore result of kauth_authorize_fileop call.
6376          */
6377         kauth_authorize_fileop(vfs_context_ucred(ctx),
6378                         KAUTH_FILEOP_RENAME,
6379                         (uintptr_t)from_name, (uintptr_t)to_name);
6380
6381 #if CONFIG_FSE
6382         if (from_name != NULL && to_name != NULL) {
6383                 if (from_truncated || to_truncated) {
6384                         // set it here since only the from_finfo gets reported up to user space
6385                         from_finfo.mode |= FSE_TRUNCATED_PATH;
6386                 }
6387
6388                 if (tvap && tvp) {
6389                         vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
6390                 }
6391                 if (fvap) {
6392                         vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
6393                 }
6394
6395                 if (tvp) {
6396                         add_fsevent(FSE_RENAME, ctx,
6397                                     FSE_ARG_STRING, from_len, from_name,
6398                                     FSE_ARG_FINFO, &from_finfo,
6399                                     FSE_ARG_STRING, to_len, to_name,
6400                                     FSE_ARG_FINFO, &to_finfo,
6401                                     FSE_ARG_DONE);
6402                 } else {
6403                         add_fsevent(FSE_RENAME, ctx,
6404                                     FSE_ARG_STRING, from_len, from_name,
6405                                     FSE_ARG_FINFO, &from_finfo,
6406                                     FSE_ARG_STRING, to_len, to_name,
6407                                     FSE_ARG_DONE);
6408                 }
6409         }
6410 #endif /* CONFIG_FSE */
6411
6412         /*
6413          * update filesystem's mount point data
6414          */
6415         if (mntrename) {
6416                 char *cp, *pathend, *mpname;
6417                 char * tobuf;
6418                 struct mount *mp;
6419                 int maxlen;
6420                 size_t len = 0;
6421
6422                 mp = fvp->v_mountedhere;
6423
6424                 if (vfs_busy(mp, LK_NOWAIT)) {
6425                         error = EBUSY;
6426                         goto out1;
6427                 }
6428                 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
6429
6430                 error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len);
6431                 if (!error) {
6432                         /* find current mount point prefix */
6433                         pathend = &mp->mnt_vfsstat.f_mntonname[0];
6434                         for (cp = pathend; *cp != '\0'; ++cp) {
6435                                 if (*cp == '/')
6436                                         pathend = cp + 1;
6437                         }
6438                         /* find last component of target name */
6439                         for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
6440                                 if (*cp == '/')
6441                                         mpname = cp + 1;
6442                         }
6443                         /* append name to prefix */
6444                         maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
6445                         bzero(pathend, maxlen);
6446                         strlcpy(pathend, mpname, maxlen);
6447                 }
6448                 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
6449
6450                 vfs_unbusy(mp);
6451         }
6452         /*
6453          * fix up name & parent pointers.  note that we first
6454          * check that fvp has the same name/parent pointers it
6455          * had before the rename call... this is a 'weak' check
6456          * at best...
6457          *
6458          * XXX oparent and oname may not be set in the compound vnop case
6459          */
6460         if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
6461                 int update_flags;
6462
6463                 update_flags = VNODE_UPDATE_NAME;
6464
6465                 if (fdvp != tdvp)
6466                         update_flags |= VNODE_UPDATE_PARENT;
6467
6468                 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
6469         }
6470 out1:
6471         if (to_name != NULL) {
6472                 RELEASE_PATH(to_name);
6473                 to_name = NULL;
6474         }
6475         if (from_name != NULL) {
6476                 RELEASE_PATH(from_name);
6477                 from_name = NULL;
6478         }
6479         if (holding_mntlock) {
6480                 mount_unlock_renames(locked_mp);
6481                 mount_drop(locked_mp, 0);
6482                 holding_mntlock = 0;
6483         }
6484         if (tdvp) {
6485                 /*
6486                  * nameidone has to happen before we vnode_put(tdvp)
6487                  * since it may need to release the fs_nodelock on the tdvp
6488                  */
6489                 nameidone(tond);
6490
6491                 if (tvp)
6492                         vnode_put(tvp);
6493                 vnode_put(tdvp);
6494         }
6495         if (fdvp) {
6496                 /*
6497                  * nameidone has to happen before we vnode_put(fdvp)
6498                  * since it may need to release the fs_nodelock on the fdvp
6499                  */
6500                 nameidone(fromnd);
6501
6502                 if (fvp)
6503                         vnode_put(fvp);
6504                 vnode_put(fdvp);
6505         }
6506
6507
6508         /*
6509          * If things changed after we did the namei, then we will re-drive
6510          * this rename call from the top.
6511          */
6512         if (do_retry) {
6513                 do_retry = 0;
6514                 goto retry;
6515         }
6516
6517         FREE(__rename_data, M_TEMP);
6518         return (error);
6519 }
6520
6521 /*
6522  * Make a directory file.
6523  *
6524  * Returns:     0                       Success
6525  *              EEXIST
6526  *      namei:???
6527  *      vnode_authorize:???
6528  *      vn_create:???
6529  */
6530 /* ARGSUSED */
6531 static int
6532 mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
6533 {
6534         vnode_t vp, dvp;
6535         int error;
6536         int update_flags = 0;
6537         int batched;
6538         struct nameidata nd;
6539
6540         AUDIT_ARG(mode, vap->va_mode);
6541         NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, UIO_USERSPACE,
6542                path, ctx);
6543         nd.ni_cnd.cn_flags |= WILLBEDIR;
6544         nd.ni_flag = NAMEI_COMPOUNDMKDIR;
6545
6546 continue_lookup:
6547         error = namei(&nd);
6548         if (error)
6549                 return (error);
6550         dvp = nd.ni_dvp;
6551         vp = nd.ni_vp;
6552
6553         if (vp != NULL) {
6554                 error = EEXIST;
6555                 goto out;
6556         }
6557
6558         batched = vnode_compound_mkdir_available(dvp);
6559
6560         VATTR_SET(vap, va_type, VDIR);
6561
6562         /*
6563          * XXX
6564          * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
6565          * only get EXISTS or EISDIR for existing path components, and not that it could see
6566          * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
6567          * it will fail in a spurious  manner.  Need to figure out if this is valid behavior.
6568          */
6569         if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
6570                 if (error == EACCES || error == EPERM) {
6571                         int error2;
6572
6573                         nameidone(&nd);
6574                         vnode_put(dvp);
6575                         dvp = NULLVP;
6576
6577                         /*
6578                          * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
6579                          * rather than EACCESS if the target exists.
6580                          */
6581                         NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, UIO_USERSPACE,
6582                                         path, ctx);
6583                         error2 = namei(&nd);
6584                         if (error2) {
6585                                 goto out;
6586                         } else {
6587                                 vp = nd.ni_vp;
6588                                 error = EEXIST;
6589                                 goto out;
6590                         }
6591                 }
6592
6593                 goto out;
6594         }
6595
6596         /*
6597          * make the directory
6598          */
6599         if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
6600                 if (error == EKEEPLOOKING) {
6601                         nd.ni_vp = vp;
6602                         goto continue_lookup;
6603                 }
6604
6605                 goto out;
6606         }
6607
6608         // Make sure the name & parent pointers are hooked up
6609         if (vp->v_name == NULL)
6610                 update_flags |= VNODE_UPDATE_NAME;
6611         if (vp->v_parent == NULLVP)
6612                 update_flags |= VNODE_UPDATE_PARENT;
6613
6614         if (update_flags)
6615                 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
6616
6617 #if CONFIG_FSE
6618         add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
6619 #endif
6620
6621 out:
6622         /*
6623          * nameidone has to happen before we vnode_put(dvp)
6624          * since it may need to release the fs_nodelock on the dvp
6625          */
6626         nameidone(&nd);
6627
6628         if (vp)
6629                 vnode_put(vp);
6630         if (dvp)
6631                 vnode_put(dvp);
6632
6633         return (error);
6634 }
6635
6636 /*
6637  * mkdir_extended: Create a directory; with extended security (ACL).
6638  *
6639  * Parameters:    p                       Process requesting to create the directory
6640  *                uap                     User argument descriptor (see below)
6641  *                retval                  (ignored)
6642  *
6643  * Indirect:      uap->path               Path of directory to create
6644  *                uap->mode               Access permissions to set
6645  *                uap->xsecurity          ACL to set
6646  *
6647  * Returns:        0                      Success
6648  *                !0                      Not success
6649  *
6650  */
6651 int
6652 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
6653 {
6654         int ciferror;
6655         kauth_filesec_t xsecdst;
6656         struct vnode_attr va;
6657
6658         AUDIT_ARG(owner, uap->uid, uap->gid);
6659
6660         xsecdst = NULL;
6661         if ((uap->xsecurity != USER_ADDR_NULL) &&
6662             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
6663                 return ciferror;
6664
6665         VATTR_INIT(&va);
6666         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
6667         if (xsecdst != NULL)
6668                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6669
6670         ciferror = mkdir1(vfs_context_current(), uap->path, &va);
6671         if (xsecdst != NULL)
6672                 kauth_filesec_free(xsecdst);
6673         return ciferror;
6674 }
6675
6676 int
6677 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
6678 {
6679         struct vnode_attr va;
6680
6681         VATTR_INIT(&va);
6682         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
6683
6684         return(mkdir1(vfs_context_current(), uap->path, &va));
6685 }
6686
6687 /*
6688  * Remove a directory file.
6689  */
6690 /* ARGSUSED */
6691 int
6692 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
6693 {
6694         vnode_t vp, dvp;
6695         int error;
6696         struct nameidata nd;
6697         char     *path = NULL;
6698         int       len=0;
6699         int has_listeners = 0;
6700         int need_event = 0;
6701         int truncated = 0;
6702         vfs_context_t ctx = vfs_context_current();
6703 #if CONFIG_FSE
6704         struct vnode_attr va;
6705 #endif /* CONFIG_FSE */
6706         struct vnode_attr *vap = NULL;
6707         int batched;
6708
6709         int restart_flag;
6710
6711         /*
6712          * This loop exists to restart rmdir in the unlikely case that two
6713          * processes are simultaneously trying to remove the same directory
6714          * containing orphaned appleDouble files.
6715          */
6716         do {
6717                 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
6718                        UIO_USERSPACE, uap->path, ctx);
6719                 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
6720 continue_lookup:
6721                 restart_flag = 0;
6722                 vap = NULL;
6723
6724                 error = namei(&nd);
6725                 if (error)
6726                         return (error);
6727
6728                 dvp = nd.ni_dvp;
6729                 vp = nd.ni_vp;
6730
6731                 if (vp) {
6732                         batched = vnode_compound_rmdir_available(vp);
6733
6734                         if (vp->v_flag & VROOT) {
6735                                 /*
6736                                  * The root of a mounted filesystem cannot be deleted.
6737                                  */
6738                                 error = EBUSY;
6739                                 goto out;
6740                         }
6741
6742                         /*
6743                          * Removed a check here; we used to abort if vp's vid
6744                          * was not the same as what we'd seen the last time around.
6745                          * I do not think that check was valid, because if we retry
6746                          * and all dirents are gone, the directory could legitimately
6747                          * be recycled but still be present in a situation where we would
6748                          * have had permission to delete.  Therefore, we won't make
6749                          * an effort to preserve that check now that we may not have a
6750                          * vp here.
6751                          */
6752
6753                         if (!batched) {
6754                                 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
6755                                 if (error) {
6756                                         goto out;
6757                                 }
6758                         }
6759                 } else {
6760                         batched = 1;
6761
6762                         if (!vnode_compound_rmdir_available(dvp)) {
6763                                 panic("No error, but no compound rmdir?");
6764                         }
6765                 }
6766
6767 #if CONFIG_FSE
6768                 fse_info  finfo;
6769
6770                 need_event = need_fsevent(FSE_DELETE, dvp);
6771                 if (need_event) {
6772                         if (!batched) {
6773                                 get_fse_info(vp, &finfo, ctx);
6774                         } else {
6775                                 error = vfs_get_notify_attributes(&va);
6776                                 if (error) {
6777                                         goto out;
6778                                 }
6779
6780                                 vap = &va;
6781                         }
6782                 }
6783 #endif
6784                 has_listeners = kauth_authorize_fileop_has_listeners();
6785                 if (need_event || has_listeners) {
6786                         if (path == NULL) {
6787                                 GET_PATH(path);
6788                                 if (path == NULL) {
6789                                         error = ENOMEM;
6790                                         goto out;
6791                                 }
6792                         }
6793
6794                         len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
6795 #if CONFIG_FSE
6796                         if (truncated) {
6797                                 finfo.mode |= FSE_TRUNCATED_PATH;
6798                         }
6799 #endif
6800                 }
6801
6802                 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
6803                 nd.ni_vp = vp;
6804                 if (vp == NULLVP) {
6805                         /* Couldn't find a vnode */
6806                         goto out;
6807                 }
6808
6809                 if (error == EKEEPLOOKING) {
6810                         goto continue_lookup;
6811                 }
6812 #if CONFIG_APPLEDOUBLE
6813                 /*
6814                  * Special case to remove orphaned AppleDouble
6815                  * files. I don't like putting this in the kernel,
6816                  * but carbon does not like putting this in carbon either,
6817                  * so here we are.
6818                  */
6819                 if (error == ENOTEMPTY) {
6820                         error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
6821                         if (error == EBUSY) {
6822                                 goto out;
6823                         }
6824
6825
6826                         /*
6827                          * Assuming everything went well, we will try the RMDIR again
6828                          */
6829                         if (!error)
6830                                 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
6831                 }
6832 #endif /* CONFIG_APPLEDOUBLE */
6833                 /*
6834                  * Call out to allow 3rd party notification of delete.
6835                  * Ignore result of kauth_authorize_fileop call.
6836                  */
6837                 if (!error) {
6838                         if (has_listeners) {
6839                                 kauth_authorize_fileop(vfs_context_ucred(ctx),
6840                                                 KAUTH_FILEOP_DELETE,
6841                                                 (uintptr_t)vp,
6842                                                 (uintptr_t)path);
6843                         }
6844
6845                         if (vp->v_flag & VISHARDLINK) {
6846                                 // see the comment in unlink1() about why we update
6847                                 // the parent of a hard link when it is removed
6848                                 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
6849                         }
6850
6851 #if CONFIG_FSE
6852                         if (need_event) {
6853                                 if (vap) {
6854                                         vnode_get_fse_info_from_vap(vp, &finfo, vap);
6855                                 }
6856                                 add_fsevent(FSE_DELETE, ctx,
6857                                                 FSE_ARG_STRING, len, path,
6858                                                 FSE_ARG_FINFO, &finfo,
6859                                                 FSE_ARG_DONE);
6860                         }
6861 #endif
6862                 }
6863
6864 out:
6865                 if (path != NULL) {
6866                         RELEASE_PATH(path);
6867                         path = NULL;
6868                 }
6869                 /*
6870                  * nameidone has to happen before we vnode_put(dvp)
6871                  * since it may need to release the fs_nodelock on the dvp
6872                  */
6873                 nameidone(&nd);
6874                 vnode_put(dvp);
6875
6876                 if (vp)
6877                         vnode_put(vp);
6878
6879                 if (restart_flag == 0) {
6880                         wakeup_one((caddr_t)vp);
6881                         return (error);
6882                 }
6883                 tsleep(vp, PVFS, "rm AD", 1);
6884
6885         } while (restart_flag != 0);
6886
6887         return (error);
6888
6889 }
6890
6891 /* Get direntry length padded to 8 byte alignment */
6892 #define DIRENT64_LEN(namlen) \
6893         ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
6894
6895 static errno_t
6896 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
6897                 int *numdirent, vfs_context_t ctxp)
6898 {
6899         /* Check if fs natively supports VNODE_READDIR_EXTENDED */
6900         if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
6901                    ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0))  {
6902                 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
6903         } else {
6904                 size_t bufsize;
6905                 void * bufptr;
6906                 uio_t auio;
6907                 struct direntry *entry64;
6908                 struct dirent *dep;
6909                 int bytesread;
6910                 int error;
6911
6912                 /*
6913                  * Our kernel buffer needs to be smaller since re-packing
6914                  * will expand each dirent.  The worse case (when the name
6915                  * length is 3) corresponds to a struct direntry size of 32
6916                  * bytes (8-byte aligned) and a struct dirent size of 12 bytes
6917                  * (4-byte aligned).  So having a buffer that is 3/8 the size
6918                  * will prevent us from reading more than we can pack.
6919                  *
6920                  * Since this buffer is wired memory, we will limit the
6921                  * buffer size to a maximum of 32K. We would really like to
6922                  * use 32K in the MIN(), but we use magic number 87371 to
6923                  * prevent uio_resid() * 3 / 8 from overflowing.
6924                  */
6925                 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
6926                 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
6927                 if (bufptr == NULL) {
6928                         return ENOMEM;
6929                 }
6930
6931                 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
6932                 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
6933                 auio->uio_offset = uio->uio_offset;
6934
6935                 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
6936
6937                 dep = (struct dirent *)bufptr;
6938                 bytesread = bufsize - uio_resid(auio);
6939
6940                 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
6941                        M_TEMP, M_WAITOK);
6942                 /*
6943                  * Convert all the entries and copy them out to user's buffer.
6944                  */
6945                 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
6946                         size_t  enbufsize = DIRENT64_LEN(dep->d_namlen);
6947
6948                         bzero(entry64, enbufsize);
6949                         /* Convert a dirent to a dirent64. */
6950                         entry64->d_ino = dep->d_ino;
6951                         entry64->d_seekoff = 0;
6952                         entry64->d_reclen = enbufsize;
6953                         entry64->d_namlen = dep->d_namlen;
6954                         entry64->d_type = dep->d_type;
6955                         bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
6956
6957                         /* Move to next entry. */
6958                         dep = (struct dirent *)((char *)dep + dep->d_reclen);
6959
6960                         /* Copy entry64 to user's buffer. */
6961                         error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
6962                 }
6963
6964                 /* Update the real offset using the offset we got from VNOP_READDIR. */
6965                 if (error == 0) {
6966                         uio->uio_offset = auio->uio_offset;
6967                 }
6968                 uio_free(auio);
6969                 FREE(bufptr, M_TEMP);
6970                 FREE(entry64, M_TEMP);
6971                 return (error);
6972         }
6973 }
6974
6975 #define GETDIRENTRIES_MAXBUFSIZE        (128 * 1024 * 1024U)
6976
6977 /*
6978  * Read a block of directory entries in a file system independent format.
6979  */
6980 static int
6981 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
6982                      off_t *offset, int flags)
6983 {
6984         vnode_t vp;
6985         struct vfs_context context = *vfs_context_current();    /* local copy */
6986         struct fileproc *fp;
6987         uio_t auio;
6988         int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6989         off_t loff;
6990         int error, eofflag, numdirent;
6991         char uio_buf[ UIO_SIZEOF(1) ];
6992
6993         error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
6994         if (error) {
6995                 return (error);
6996         }
6997         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
6998                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6999                 error = EBADF;
7000                 goto out;
7001         }
7002
7003         if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
7004                 bufsize = GETDIRENTRIES_MAXBUFSIZE;
7005
7006 #if CONFIG_MACF
7007         error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
7008         if (error)
7009                 goto out;
7010 #endif
7011         if ( (error = vnode_getwithref(vp)) ) {
7012                 goto out;
7013         }
7014         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7015
7016 unionread:
7017         if (vp->v_type != VDIR) {
7018                 (void)vnode_put(vp);
7019                 error = EINVAL;
7020                 goto out;
7021         }
7022
7023 #if CONFIG_MACF
7024         error = mac_vnode_check_readdir(&context, vp);
7025         if (error != 0) {
7026                 (void)vnode_put(vp);
7027                 goto out;
7028         }
7029 #endif /* MAC */
7030
7031         loff = fp->f_fglob->fg_offset;
7032         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
7033         uio_addiov(auio, bufp, bufsize);
7034
7035         if (flags & VNODE_READDIR_EXTENDED) {
7036                 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
7037                 fp->f_fglob->fg_offset = uio_offset(auio);
7038         } else {
7039                 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
7040                 fp->f_fglob->fg_offset = uio_offset(auio);
7041         }
7042         if (error) {
7043                 (void)vnode_put(vp);
7044                 goto out;
7045         }
7046
7047         if ((user_ssize_t)bufsize == uio_resid(auio)){
7048                 if (union_dircheckp) {
7049                         error = union_dircheckp(&vp, fp, &context);
7050                         if (error == -1)
7051                                 goto unionread;
7052                         if (error)
7053                                 goto out;
7054                 }
7055
7056                 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
7057                         struct vnode *tvp = vp;
7058                         if (lookup_traverse_union(tvp, &vp, &context) == 0) {
7059                                 vnode_ref(vp);
7060                                 fp->f_fglob->fg_data = (caddr_t) vp;
7061                                 fp->f_fglob->fg_offset = 0;
7062                                 vnode_rele(tvp);
7063                                 vnode_put(tvp);
7064                                 goto unionread;
7065                         }
7066                         vp = tvp;
7067                 }
7068         }
7069
7070         vnode_put(vp);
7071         if (offset) {
7072                 *offset = loff;
7073         }
7074
7075         *bytesread = bufsize - uio_resid(auio);
7076 out:
7077         file_drop(fd);
7078         return (error);
7079 }
7080
7081
7082 int
7083 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
7084 {
7085         off_t offset;
7086         ssize_t bytesread;
7087         int error;
7088
7089         AUDIT_ARG(fd, uap->fd);
7090         error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
7091
7092         if (error == 0) {
7093                 if (proc_is64bit(p)) {
7094                         user64_long_t base = (user64_long_t)offset;
7095                         error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
7096                 } else {
7097                         user32_long_t base = (user32_long_t)offset;
7098                         error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
7099                 }
7100                 *retval = bytesread;
7101         }
7102         return (error);
7103 }
7104
7105 int
7106 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
7107 {
7108         off_t offset;
7109         ssize_t bytesread;
7110         int error;
7111
7112         AUDIT_ARG(fd, uap->fd);
7113         error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
7114
7115         if (error == 0) {
7116                 *retval = bytesread;
7117                 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
7118         }
7119         return (error);
7120 }
7121
7122
7123 /*
7124  * Set the mode mask for creation of filesystem nodes.
7125  * XXX implement xsecurity
7126  */
7127 #define UMASK_NOXSECURITY        (void *)1      /* leave existing xsecurity alone */
7128 static int
7129 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
7130 {
7131         struct filedesc *fdp;
7132
7133         AUDIT_ARG(mask, newmask);
7134         proc_fdlock(p);
7135         fdp = p->p_fd;
7136         *retval = fdp->fd_cmask;
7137         fdp->fd_cmask = newmask & ALLPERMS;
7138         proc_fdunlock(p);
7139         return (0);
7140 }
7141
7142 /*
7143  * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
7144  *
7145  * Parameters:    p                       Process requesting to set the umask
7146  *                uap                     User argument descriptor (see below)
7147  *                retval                  umask of the process (parameter p)
7148  *
7149  * Indirect:      uap->newmask            umask to set
7150  *                uap->xsecurity          ACL to set
7151  *
7152  * Returns:        0                      Success
7153  *                !0                      Not success
7154  *
7155  */
7156 int
7157 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
7158 {
7159         int ciferror;
7160         kauth_filesec_t xsecdst;
7161
7162         xsecdst = KAUTH_FILESEC_NONE;
7163         if (uap->xsecurity != USER_ADDR_NULL) {
7164                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
7165                         return ciferror;
7166         } else {
7167                 xsecdst = KAUTH_FILESEC_NONE;
7168         }
7169
7170         ciferror = umask1(p, uap->newmask, xsecdst, retval);
7171
7172         if (xsecdst != KAUTH_FILESEC_NONE)
7173                 kauth_filesec_free(xsecdst);
7174         return ciferror;
7175 }
7176
7177 int
7178 umask(proc_t p, struct umask_args *uap, int32_t *retval)
7179 {
7180         return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
7181 }
7182
7183 /*
7184  * Void all references to file by ripping underlying filesystem
7185  * away from vnode.
7186  */
7187 /* ARGSUSED */
7188 int
7189 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
7190 {
7191         vnode_t vp;
7192         struct vnode_attr va;
7193         vfs_context_t ctx = vfs_context_current();
7194         int error;
7195         struct nameidata nd;
7196
7197         NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
7198                uap->path, ctx);
7199         error = namei(&nd);
7200         if (error)
7201                 return (error);
7202         vp = nd.ni_vp;
7203
7204         nameidone(&nd);
7205
7206         if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
7207                 error = ENOTSUP;
7208                 goto out;
7209         }
7210
7211         if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
7212                 error = EBUSY;
7213                 goto out;
7214         }
7215
7216 #if CONFIG_MACF
7217         error = mac_vnode_check_revoke(ctx, vp);
7218         if (error)
7219                 goto out;
7220 #endif
7221
7222         VATTR_INIT(&va);
7223         VATTR_WANTED(&va, va_uid);
7224         if ((error = vnode_getattr(vp, &va, ctx)))
7225                 goto out;
7226         if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
7227             (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
7228                 goto out;
7229         if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
7230                 VNOP_REVOKE(vp, REVOKEALL, ctx);
7231 out:
7232         vnode_put(vp);
7233         return (error);
7234 }
7235
7236
7237 /*
7238  *  HFS/HFS PlUS SPECIFIC SYSTEM CALLS
7239  *  The following system calls are designed to support features
7240  *  which are specific to the HFS & HFS Plus volume formats
7241  */
7242
7243
7244 /*
7245  * Obtain attribute information on objects in a directory while enumerating
7246  * the directory.
7247  */
7248 /* ARGSUSED */
7249 int
7250 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
7251 {
7252         vnode_t vp;
7253         struct fileproc *fp;
7254         uio_t auio = NULL;
7255         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7256         uint32_t count, savecount;
7257         uint32_t newstate;
7258         int error, eofflag;
7259         uint32_t loff;
7260         struct attrlist attributelist;
7261         vfs_context_t ctx = vfs_context_current();
7262         int fd = uap->fd;
7263         char uio_buf[ UIO_SIZEOF(1) ];
7264         kauth_action_t action;
7265
7266         AUDIT_ARG(fd, fd);
7267
7268         /* Get the attributes into kernel space */
7269         if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
7270                 return(error);
7271         }
7272         if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
7273                 return(error);
7274         }
7275         savecount = count;
7276         if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
7277                 return (error);
7278         }
7279         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7280                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7281                 error = EBADF;
7282                 goto out;
7283         }
7284
7285
7286 #if CONFIG_MACF
7287         error = mac_file_check_change_offset(vfs_context_ucred(ctx),
7288             fp->f_fglob);
7289         if (error)
7290                 goto out;
7291 #endif
7292
7293
7294         if ( (error = vnode_getwithref(vp)) )
7295                 goto out;
7296
7297         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7298
7299 unionread:
7300         if (vp->v_type != VDIR) {
7301                 (void)vnode_put(vp);
7302                 error = EINVAL;
7303                 goto out;
7304         }
7305
7306 #if CONFIG_MACF
7307         error = mac_vnode_check_readdir(ctx, vp);
7308         if (error != 0) {
7309                 (void)vnode_put(vp);
7310                 goto out;
7311         }
7312 #endif /* MAC */
7313
7314         /* set up the uio structure which will contain the users return buffer */
7315         loff = fp->f_fglob->fg_offset;
7316         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
7317         uio_addiov(auio, uap->buffer, uap->buffersize);
7318
7319         /*
7320          * If the only item requested is file names, we can let that past with
7321          * just LIST_DIRECTORY.  If they want any other attributes, that means
7322          * they need SEARCH as well.
7323          */
7324         action = KAUTH_VNODE_LIST_DIRECTORY;
7325         if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
7326             attributelist.fileattr || attributelist.dirattr)
7327                 action |= KAUTH_VNODE_SEARCH;
7328
7329         if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
7330
7331                 /* Believe it or not, uap->options only has 32-bits of valid
7332                  * info, so truncate before extending again */
7333
7334                 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
7335                                 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
7336         }
7337
7338         if (error) {
7339                 (void) vnode_put(vp);
7340                 goto out;
7341         }
7342
7343         /*
7344          * If we've got the last entry of a directory in a union mount
7345          * then reset the eofflag and pretend there's still more to come.
7346          * The next call will again set eofflag and the buffer will be empty,
7347          * so traverse to the underlying directory and do the directory
7348          * read there.
7349          */
7350         if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
7351                 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
7352                         eofflag = 0;
7353                 } else {                                                // Empty buffer
7354                         struct vnode *tvp = vp;
7355                         if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
7356                                 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
7357                                 fp->f_fglob->fg_data = (caddr_t) vp;
7358                                 fp->f_fglob->fg_offset = 0; // reset index for new dir
7359                                 count = savecount;
7360                                 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
7361                                 vnode_put(tvp);
7362                                 goto unionread;
7363                         }
7364                         vp = tvp;
7365                 }
7366         }
7367
7368         (void)vnode_put(vp);
7369
7370         if (error)
7371                 goto out;
7372         fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
7373
7374         if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
7375                 goto out;
7376         if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
7377                 goto out;
7378         if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
7379                 goto out;
7380
7381         *retval = eofflag;  /* similar to getdirentries */
7382         error = 0;
7383 out:
7384         file_drop(fd);
7385         return (error); /* return error earlier, an retval of 0 or 1 now */
7386
7387 } /* end of getdirentriesattr system call */
7388
7389 /*
7390 * Exchange data between two files
7391 */
7392
7393 /* ARGSUSED */
7394 int
7395 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
7396 {
7397
7398         struct nameidata fnd, snd;
7399         vfs_context_t ctx = vfs_context_current();
7400         vnode_t fvp;
7401         vnode_t svp;
7402         int error;
7403         u_int32_t nameiflags;
7404         char *fpath = NULL;
7405         char *spath = NULL;
7406         int   flen=0, slen=0;
7407         int from_truncated=0, to_truncated=0;
7408 #if CONFIG_FSE
7409         fse_info f_finfo, s_finfo;
7410 #endif
7411
7412         nameiflags = 0;
7413         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
7414
7415         NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
7416                UIO_USERSPACE, uap->path1, ctx);
7417
7418         error = namei(&fnd);
7419         if (error)
7420                 goto out2;
7421
7422         nameidone(&fnd);
7423         fvp = fnd.ni_vp;
7424
7425         NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
7426                UIO_USERSPACE, uap->path2, ctx);
7427
7428         error = namei(&snd);
7429         if (error) {
7430                 vnode_put(fvp);
7431                 goto out2;
7432         }
7433         nameidone(&snd);
7434         svp = snd.ni_vp;
7435
7436         /*
7437          * if the files are the same, return an inval error
7438          */
7439         if (svp == fvp) {
7440                 error = EINVAL;
7441                 goto out;
7442         }
7443
7444         /*
7445          * if the files are on different volumes, return an error
7446          */
7447         if (svp->v_mount != fvp->v_mount) {
7448                 error = EXDEV;
7449                 goto out;
7450         }
7451
7452         /* If they're not files, return an error */
7453         if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
7454                 error = EINVAL;
7455                 goto out;
7456         }
7457
7458 #if CONFIG_MACF
7459         error = mac_vnode_check_exchangedata(ctx,
7460             fvp, svp);
7461         if (error)
7462                 goto out;
7463 #endif
7464         if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
7465             ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
7466                 goto out;
7467
7468         if (
7469 #if CONFIG_FSE
7470         need_fsevent(FSE_EXCHANGE, fvp) ||
7471 #endif
7472         kauth_authorize_fileop_has_listeners()) {
7473                 GET_PATH(fpath);
7474                 GET_PATH(spath);
7475                 if (fpath == NULL || spath == NULL) {
7476                         error = ENOMEM;
7477                         goto out;
7478                 }
7479
7480                 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
7481                 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
7482
7483 #if CONFIG_FSE
7484                 get_fse_info(fvp, &f_finfo, ctx);
7485                 get_fse_info(svp, &s_finfo, ctx);
7486                 if (from_truncated || to_truncated) {
7487                         // set it here since only the f_finfo gets reported up to user space
7488                         f_finfo.mode |= FSE_TRUNCATED_PATH;
7489                 }
7490 #endif
7491         }
7492         /* Ok, make the call */
7493         error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
7494
7495         if (error == 0) {
7496             const char *tmpname;
7497
7498             if (fpath != NULL && spath != NULL) {
7499                     /* call out to allow 3rd party notification of exchangedata.
7500                      * Ignore result of kauth_authorize_fileop call.
7501                      */
7502                     kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
7503                                            (uintptr_t)fpath, (uintptr_t)spath);
7504             }
7505             name_cache_lock();
7506
7507             tmpname     = fvp->v_name;
7508             fvp->v_name = svp->v_name;
7509             svp->v_name = tmpname;
7510
7511             if (fvp->v_parent != svp->v_parent) {
7512                 vnode_t tmp;
7513
7514                 tmp           = fvp->v_parent;
7515                 fvp->v_parent = svp->v_parent;
7516                 svp->v_parent = tmp;
7517             }
7518             name_cache_unlock();
7519
7520 #if CONFIG_FSE
7521             if (fpath != NULL && spath != NULL) {
7522                     add_fsevent(FSE_EXCHANGE, ctx,
7523                                 FSE_ARG_STRING, flen, fpath,
7524                                 FSE_ARG_FINFO, &f_finfo,
7525                                 FSE_ARG_STRING, slen, spath,
7526                                 FSE_ARG_FINFO, &s_finfo,
7527                                 FSE_ARG_DONE);
7528             }
7529 #endif
7530         }
7531
7532 out:
7533         if (fpath != NULL)
7534                 RELEASE_PATH(fpath);
7535         if (spath != NULL)
7536                 RELEASE_PATH(spath);
7537         vnode_put(svp);
7538         vnode_put(fvp);
7539 out2:
7540         return (error);
7541 }
7542
7543 /*
7544  * Return (in MB) the amount of freespace on the given vnode's volume.
7545  */
7546 uint32_t freespace_mb(vnode_t vp);
7547
7548 uint32_t
7549 freespace_mb(vnode_t vp)
7550 {
7551         vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
7552         return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
7553                 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
7554 }
7555
7556 #if CONFIG_SEARCHFS
7557
7558 /* ARGSUSED */
7559
7560 int
7561 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
7562 {
7563         vnode_t vp, tvp;
7564         int i, error=0;
7565         int fserror = 0;
7566         struct nameidata nd;
7567         struct user64_fssearchblock searchblock;
7568         struct searchstate *state;
7569         struct attrlist *returnattrs;
7570         struct timeval timelimit;
7571         void *searchparams1,*searchparams2;
7572         uio_t auio = NULL;
7573         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7574         uint32_t nummatches;
7575         int mallocsize;
7576         uint32_t nameiflags;
7577         vfs_context_t ctx = vfs_context_current();
7578         char uio_buf[ UIO_SIZEOF(1) ];
7579
7580         /* Start by copying in fsearchblock parameter list */
7581     if (IS_64BIT_PROCESS(p)) {
7582         error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
7583         timelimit.tv_sec = searchblock.timelimit.tv_sec;
7584         timelimit.tv_usec = searchblock.timelimit.tv_usec;
7585     }
7586     else {
7587         struct user32_fssearchblock tmp_searchblock;
7588
7589         error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
7590         // munge into 64-bit version
7591         searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
7592         searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
7593         searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
7594         searchblock.maxmatches = tmp_searchblock.maxmatches;
7595                 /*
7596                  * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
7597                  * from a 32 bit long, and tv_usec is already a signed 32 bit int.
7598                  */
7599         timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
7600         timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
7601         searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
7602         searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
7603         searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
7604         searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
7605         searchblock.searchattrs = tmp_searchblock.searchattrs;
7606     }
7607         if (error)
7608                 return(error);
7609
7610         /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
7611          */
7612         if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
7613                 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
7614                 return(EINVAL);
7615
7616         /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
7617         /* It all has to do into local memory and it's not that big so we might as well  put it all together. */
7618         /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
7619         /* block.                                                                                             */
7620
7621         mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
7622                       sizeof(struct attrlist) + sizeof(struct searchstate);
7623
7624         MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
7625
7626         /* Now set up the various pointers to the correct place in our newly allocated memory */
7627
7628         searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
7629         returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
7630         state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
7631
7632         /* Now copy in the stuff given our local variables. */
7633
7634         if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
7635                 goto freeandexit;
7636
7637         if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
7638                 goto freeandexit;
7639
7640         if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
7641                 goto freeandexit;
7642
7643         if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
7644                 goto freeandexit;
7645
7646         /*
7647          * When searching a union mount, need to set the
7648          * start flag at the first call on each layer to
7649          * reset state for the new volume.
7650          */
7651         if (uap->options & SRCHFS_START)
7652                 state->ss_union_layer = 0;
7653         else
7654                 uap->options |= state->ss_union_flags;
7655         state->ss_union_flags = 0;
7656
7657         /*
7658          * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
7659          * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
7660          * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
7661          * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
7662          * validate the user-supplied data offset of the attrreference_t, we'll do it here.
7663          */
7664
7665         if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
7666                 attrreference_t* string_ref;
7667                 u_int32_t* start_length;
7668                 user64_size_t param_length;
7669
7670                 /* validate searchparams1 */
7671                 param_length = searchblock.sizeofsearchparams1;
7672                 /* skip the word that specifies length of the buffer */
7673                 start_length= (u_int32_t*) searchparams1;
7674                 start_length= start_length+1;
7675                 string_ref= (attrreference_t*) start_length;
7676
7677                 /* ensure no negative offsets or too big offsets */
7678                 if (string_ref->attr_dataoffset < 0 ) {
7679                         error = EINVAL;
7680                         goto freeandexit;
7681                 }
7682                 if (string_ref->attr_length > MAXPATHLEN) {
7683                         error = EINVAL;
7684                         goto freeandexit;
7685                 }
7686
7687                 /* Check for pointer overflow in the string ref */
7688                 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
7689                         error = EINVAL;
7690                         goto freeandexit;
7691                 }
7692
7693                 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
7694                         error = EINVAL;
7695                         goto freeandexit;
7696                 }
7697                 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
7698                         error = EINVAL;
7699                         goto freeandexit;
7700                 }
7701         }
7702
7703         /* set up the uio structure which will contain the users return buffer */
7704         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
7705         uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
7706
7707         nameiflags = 0;
7708         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
7709         NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
7710                UIO_USERSPACE, uap->path, ctx);
7711
7712         error = namei(&nd);
7713         if (error)
7714                 goto freeandexit;
7715         vp = nd.ni_vp;
7716         nameidone(&nd);
7717
7718         /*
7719          * Switch to the root vnode for the volume
7720          */
7721         error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
7722         if (error)
7723                 goto freeandexit;
7724         vnode_put(vp);
7725         vp = tvp;
7726
7727         /*
7728          * If it's a union mount, the path lookup takes
7729          * us to the top layer. But we may need to descend
7730          * to a lower layer. For non-union mounts the layer
7731          * is always zero.
7732          */
7733         for (i = 0; i < (int) state->ss_union_layer; i++) {
7734                 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
7735                         break;
7736                 tvp = vp;
7737                 vp = vp->v_mount->mnt_vnodecovered;
7738                 if (vp == NULL) {
7739                         vp = tvp;
7740                         error = ENOENT;
7741                         goto freeandexit;
7742                 }
7743                 vnode_getwithref(vp);
7744                 vnode_put(tvp);
7745         }
7746
7747 #if CONFIG_MACF
7748         error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
7749         if (error) {
7750                 vnode_put(vp);
7751                 goto freeandexit;
7752         }
7753 #endif
7754
7755
7756         /*
7757          * If searchblock.maxmatches == 0, then skip the search. This has happened
7758          * before and sometimes the underlying code doesnt deal with it well.
7759          */
7760          if (searchblock.maxmatches == 0) {
7761                 nummatches = 0;
7762                 goto saveandexit;
7763          }
7764
7765         /*
7766          * Allright, we have everything we need, so lets make that call.
7767          *
7768          * We keep special track of the return value from the file system:
7769          * EAGAIN is an acceptable error condition that shouldn't keep us
7770          * from copying out any results...
7771          */
7772
7773         fserror = VNOP_SEARCHFS(vp,
7774                 searchparams1,
7775                 searchparams2,
7776                 &searchblock.searchattrs,
7777                 (u_long)searchblock.maxmatches,
7778                 &timelimit,
7779                 returnattrs,
7780                 &nummatches,
7781                 (u_long)uap->scriptcode,
7782                 (u_long)uap->options,
7783                 auio,
7784                 (struct searchstate *) &state->ss_fsstate,
7785                 ctx);
7786
7787         /*
7788          * If it's a union mount we need to be called again
7789          * to search the mounted-on filesystem.
7790          */
7791         if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
7792                 state->ss_union_flags = SRCHFS_START;
7793                 state->ss_union_layer++;        // search next layer down
7794                 fserror = EAGAIN;
7795         }
7796
7797 saveandexit:
7798
7799         vnode_put(vp);
7800
7801         /* Now copy out the stuff that needs copying out. That means the number of matches, the
7802            search state.  Everything was already put into he return buffer by the vop call. */
7803
7804         if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
7805                 goto freeandexit;
7806
7807         if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
7808                 goto freeandexit;
7809
7810         error = fserror;
7811
7812 freeandexit:
7813
7814         FREE(searchparams1,M_TEMP);
7815
7816         return(error);
7817
7818
7819 } /* end of searchfs system call */
7820
7821 #else /* CONFIG_SEARCHFS */
7822
7823 int
7824 searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
7825 {
7826         return (ENOTSUP);
7827 }
7828
7829 #endif /* CONFIG_SEARCHFS */
7830
7831
7832 lck_grp_attr_t *  nspace_group_attr;
7833 lck_attr_t *      nspace_lock_attr;
7834 lck_grp_t *       nspace_mutex_group;
7835
7836 lck_mtx_t         nspace_handler_lock;
7837 lck_mtx_t         nspace_handler_exclusion_lock;
7838
7839 time_t snapshot_timestamp=0;
7840 int nspace_allow_virtual_devs=0;
7841
7842 void nspace_handler_init(void);
7843
7844 typedef struct nspace_item_info {
7845         struct vnode *vp;
7846         void         *arg;
7847         uint64_t      op;
7848         uint32_t      vid;
7849         uint32_t      flags;
7850         uint32_t      token;
7851         uint32_t      refcount;
7852 } nspace_item_info;
7853
7854 #define MAX_NSPACE_ITEMS   128
7855 nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
7856 uint32_t      nspace_item_idx=0;              // also used as the sleep/wakeup rendezvous address
7857 uint32_t      nspace_token_id=0;
7858 uint32_t      nspace_handler_timeout = 15;    // seconds
7859
7860 #define NSPACE_ITEM_NEW         0x0001
7861 #define NSPACE_ITEM_PROCESSING  0x0002
7862 #define NSPACE_ITEM_DEAD        0x0004
7863 #define NSPACE_ITEM_CANCELLED   0x0008
7864 #define NSPACE_ITEM_DONE        0x0010
7865 #define NSPACE_ITEM_RESET_TIMER 0x0020
7866
7867 #define NSPACE_ITEM_NSPACE_EVENT   0x0040
7868 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
7869 #define NSPACE_ITEM_TRACK_EVENT    0x0100
7870
7871 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT | NSPACE_ITEM_TRACK_EVENT)
7872
7873 //#pragma optimization_level 0
7874
7875 typedef enum {
7876         NSPACE_HANDLER_NSPACE = 0,
7877         NSPACE_HANDLER_SNAPSHOT = 1,
7878         NSPACE_HANDLER_TRACK = 2,
7879
7880         NSPACE_HANDLER_COUNT,
7881 } nspace_type_t;
7882
7883 typedef struct {
7884         uint64_t handler_tid;
7885         struct proc *handler_proc;
7886         int handler_busy;
7887 } nspace_handler_t;
7888
7889 nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
7890
7891 /* namespace fsctl functions */
7892 static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
7893 static int nspace_item_flags_for_type(nspace_type_t nspace_type);
7894 static int nspace_open_flags_for_type(nspace_type_t nspace_type);
7895 static nspace_type_t nspace_type_for_op(uint64_t op);
7896 static int nspace_is_special_process(struct proc *proc);
7897 static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
7898 static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
7899 static int validate_namespace_args (int is64bit, int size);
7900 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
7901
7902
7903 static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
7904 {
7905         switch(nspace_type) {
7906                 case NSPACE_HANDLER_NSPACE:
7907                         return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
7908                 case NSPACE_HANDLER_SNAPSHOT:
7909                         return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
7910                 case NSPACE_HANDLER_TRACK:
7911                         return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_TRACK_EVENT;
7912                 default:
7913                         printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
7914                         return 0;
7915         }
7916 }
7917
7918 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
7919 {
7920         switch(nspace_type) {
7921                 case NSPACE_HANDLER_NSPACE:
7922                         return NSPACE_ITEM_NSPACE_EVENT;
7923                 case NSPACE_HANDLER_SNAPSHOT:
7924                         return NSPACE_ITEM_SNAPSHOT_EVENT;
7925                 case NSPACE_HANDLER_TRACK:
7926                         return NSPACE_ITEM_TRACK_EVENT;
7927                 default:
7928                         printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
7929                         return 0;
7930         }
7931 }
7932
7933 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
7934 {
7935         switch(nspace_type) {
7936                 case NSPACE_HANDLER_NSPACE:
7937                         return FREAD | FWRITE | O_EVTONLY;
7938                 case NSPACE_HANDLER_SNAPSHOT:
7939                 case NSPACE_HANDLER_TRACK:
7940                         return FREAD | O_EVTONLY;
7941                 default:
7942                         printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
7943                         return 0;
7944         }
7945 }
7946
7947 static inline nspace_type_t nspace_type_for_op(uint64_t op)
7948 {
7949         switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
7950                 case NAMESPACE_HANDLER_NSPACE_EVENT:
7951                         return NSPACE_HANDLER_NSPACE;
7952                 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
7953                         return NSPACE_HANDLER_SNAPSHOT;
7954                 case NAMESPACE_HANDLER_TRACK_EVENT:
7955                         return NSPACE_HANDLER_TRACK;
7956                 default:
7957                         printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
7958                         return NSPACE_HANDLER_NSPACE;
7959         }
7960 }
7961
7962 static inline int nspace_is_special_process(struct proc *proc)
7963 {
7964         int i;
7965         for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
7966                 if (proc == nspace_handlers[i].handler_proc)
7967                         return 1;
7968         }
7969         return 0;
7970 }
7971
7972 void
7973 nspace_handler_init(void)
7974 {
7975         nspace_lock_attr    = lck_attr_alloc_init();
7976         nspace_group_attr   = lck_grp_attr_alloc_init();
7977         nspace_mutex_group  = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
7978         lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
7979         lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
7980         memset(&nspace_items[0], 0, sizeof(nspace_items));
7981 }
7982
7983 void
7984 nspace_proc_exit(struct proc *p)
7985 {
7986         int i, event_mask = 0;
7987
7988         for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
7989                 if (p == nspace_handlers[i].handler_proc) {
7990                         event_mask |= nspace_item_flags_for_type(i);
7991                         nspace_handlers[i].handler_tid = 0;
7992                         nspace_handlers[i].handler_proc = NULL;
7993                 }
7994         }
7995
7996         if (event_mask == 0) {
7997                 return;
7998         }
7999
8000         if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
8001                 // if this process was the snapshot handler, zero snapshot_timeout
8002                 snapshot_timestamp = 0;
8003         }
8004
8005         //
8006         // unblock anyone that's waiting for the handler that died
8007         //
8008         lck_mtx_lock(&nspace_handler_lock);
8009         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8010                 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
8011
8012                         if ( nspace_items[i].flags & event_mask ) {
8013
8014                                 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
8015                                         vnode_lock_spin(nspace_items[i].vp);
8016                                         nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8017                                         vnode_unlock(nspace_items[i].vp);
8018                                 }
8019                                 nspace_items[i].vp = NULL;
8020                                 nspace_items[i].vid = 0;
8021                                 nspace_items[i].flags = NSPACE_ITEM_DONE;
8022                                 nspace_items[i].token = 0;
8023
8024                                 wakeup((caddr_t)&(nspace_items[i].vp));
8025                         }
8026                 }
8027         }
8028
8029         wakeup((caddr_t)&nspace_item_idx);
8030         lck_mtx_unlock(&nspace_handler_lock);
8031 }
8032
8033
8034 int
8035 resolve_nspace_item(struct vnode *vp, uint64_t op)
8036 {
8037         return resolve_nspace_item_ext(vp, op, NULL);
8038 }
8039
8040 int
8041 resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
8042 {
8043         int i, error, keep_waiting;
8044         struct timespec ts;
8045         nspace_type_t nspace_type = nspace_type_for_op(op);
8046
8047         // only allow namespace events on regular files, directories and symlinks.
8048         if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
8049                 return 0;
8050         }
8051
8052         //
8053         // if this is a snapshot event and the vnode is on a
8054         // disk image just pretend nothing happened since any
8055         // change to the disk image will cause the disk image
8056         // itself to get backed up and this avoids multi-way
8057         // deadlocks between the snapshot handler and the ever
8058         // popular diskimages-helper process.  the variable
8059         // nspace_allow_virtual_devs allows this behavior to
8060         // be overridden (for use by the Mobile TimeMachine
8061         // testing infrastructure which uses disk images)
8062         //
8063         if (   (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
8064             && (vp->v_mount != NULL)
8065             && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
8066             && !nspace_allow_virtual_devs) {
8067
8068                 return 0;
8069         }
8070
8071         // if (thread_tid(current_thread()) == namespace_handler_tid) {
8072         if (nspace_handlers[nspace_type].handler_proc == NULL) {
8073                 return 0;
8074         }
8075
8076         if (nspace_is_special_process(current_proc())) {
8077                 return EDEADLK;
8078         }
8079
8080         lck_mtx_lock(&nspace_handler_lock);
8081
8082 retry:
8083         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8084                 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
8085                         break;
8086                 }
8087         }
8088
8089         if (i >= MAX_NSPACE_ITEMS) {
8090                 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8091                         if (nspace_items[i].flags == 0) {
8092                                 break;
8093                         }
8094                 }
8095         } else {
8096                 nspace_items[i].refcount++;
8097         }
8098
8099         if (i >= MAX_NSPACE_ITEMS) {
8100                 ts.tv_sec = nspace_handler_timeout;
8101                 ts.tv_nsec = 0;
8102
8103                 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
8104                 if (error == 0) {
8105                         // an entry got free'd up, go see if we can get a slot
8106                         goto retry;
8107                 } else {
8108                         lck_mtx_unlock(&nspace_handler_lock);
8109                         return error;
8110                 }
8111         }
8112
8113         //
8114         // if it didn't already exist, add it.  if it did exist
8115         // we'll get woken up when someone does a wakeup() on
8116         // the slot in the nspace_items table.
8117         //
8118         if (vp != nspace_items[i].vp) {
8119                 nspace_items[i].vp = vp;
8120                 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg;  // arg is {NULL, true, uio *} - only pass uio thru to the user
8121                 nspace_items[i].op = op;
8122                 nspace_items[i].vid = vnode_vid(vp);
8123                 nspace_items[i].flags = NSPACE_ITEM_NEW;
8124                 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
8125                 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
8126                         if (arg) {
8127                                 vnode_lock_spin(vp);
8128                                 vp->v_flag |= VNEEDSSNAPSHOT;
8129                                 vnode_unlock(vp);
8130                         }
8131                 }
8132
8133                 nspace_items[i].token = 0;
8134                 nspace_items[i].refcount = 1;
8135
8136                 wakeup((caddr_t)&nspace_item_idx);
8137         }
8138
8139         //
8140         // Now go to sleep until the handler does a wakeup on this
8141         // slot in the nspace_items table (or we timeout).
8142         //
8143         keep_waiting = 1;
8144         while(keep_waiting) {
8145                 ts.tv_sec = nspace_handler_timeout;
8146                 ts.tv_nsec = 0;
8147                 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
8148
8149                 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
8150                         error = 0;
8151                 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
8152                         error = nspace_items[i].token;
8153                 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
8154                         if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
8155                                 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
8156                                 continue;
8157                         } else {
8158                                 error = ETIMEDOUT;
8159                         }
8160                 } else if (error == 0) {
8161                         // hmmm, why did we get woken up?
8162                         printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
8163                                nspace_items[i].token);
8164                 }
8165
8166                 if (--nspace_items[i].refcount == 0) {
8167                         nspace_items[i].vp = NULL;     // clear this so that no one will match on it again
8168                         nspace_items[i].arg = NULL;
8169                         nspace_items[i].token = 0;     // clear this so that the handler will not find it anymore
8170                         nspace_items[i].flags = 0;     // this clears it for re-use
8171                 }
8172                 wakeup(&nspace_token_id);
8173                 keep_waiting = 0;
8174         }
8175
8176         lck_mtx_unlock(&nspace_handler_lock);
8177
8178         return error;
8179 }
8180
8181
8182 int
8183 get_nspace_item_status(struct vnode *vp, int32_t *status)
8184 {
8185         int i;
8186
8187         lck_mtx_lock(&nspace_handler_lock);
8188         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8189                 if (nspace_items[i].vp == vp) {
8190                         break;
8191                 }
8192         }
8193
8194         if (i >= MAX_NSPACE_ITEMS) {
8195                 lck_mtx_unlock(&nspace_handler_lock);
8196                 return ENOENT;
8197         }
8198
8199         *status = nspace_items[i].flags;
8200         lck_mtx_unlock(&nspace_handler_lock);
8201         return 0;
8202 }
8203
8204
8205 #if 0
8206 static int
8207 build_volfs_path(struct vnode *vp, char *path, int *len)
8208 {
8209         struct vnode_attr va;
8210         int ret;
8211
8212         VATTR_INIT(&va);
8213         VATTR_WANTED(&va, va_fsid);
8214         VATTR_WANTED(&va, va_fileid);
8215
8216         if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
8217                 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
8218                 ret = -1;
8219         } else {
8220                 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
8221                 ret = 0;
8222         }
8223
8224         return ret;
8225 }
8226 #endif
8227
8228 //
8229 // Note: this function does NOT check permissions on all of the
8230 // parent directories leading to this vnode.  It should only be
8231 // called on behalf of a root process.  Otherwise a process may
8232 // get access to a file because the file itself is readable even
8233 // though its parent directories would prevent access.
8234 //
8235 static int
8236 vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
8237 {
8238         int error, action;
8239
8240         if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8241                 return error;
8242         }
8243
8244 #if CONFIG_MACF
8245         error = mac_vnode_check_open(ctx, vp, fmode);
8246         if (error)
8247                 return error;
8248 #endif
8249
8250         /* compute action to be authorized */
8251         action = 0;
8252         if (fmode & FREAD) {
8253                 action |= KAUTH_VNODE_READ_DATA;
8254         }
8255         if (fmode & (FWRITE | O_TRUNC)) {
8256                 /*
8257                  * If we are writing, appending, and not truncating,
8258                  * indicate that we are appending so that if the
8259                  * UF_APPEND or SF_APPEND bits are set, we do not deny
8260                  * the open.
8261                  */
8262                 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
8263                         action |= KAUTH_VNODE_APPEND_DATA;
8264                 } else {
8265                         action |= KAUTH_VNODE_WRITE_DATA;
8266                 }
8267         }
8268
8269         if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
8270                 return error;
8271
8272
8273         //
8274         // if the vnode is tagged VOPENEVT and the current process
8275         // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
8276         // flag to the open mode so that this open won't count against
8277         // the vnode when carbon delete() does a vnode_isinuse() to see
8278         // if a file is currently in use.  this allows spotlight
8279         // importers to not interfere with carbon apps that depend on
8280         // the no-delete-if-busy semantics of carbon delete().
8281         //
8282         if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
8283                 fmode |= O_EVTONLY;
8284         }
8285
8286         if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
8287                 return error;
8288         }
8289         if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
8290                 VNOP_CLOSE(vp, fmode, ctx);
8291                 return error;
8292         }
8293
8294         /* Call out to allow 3rd party notification of open.
8295          * Ignore result of kauth_authorize_fileop call.
8296          */
8297 #if CONFIG_MACF
8298         mac_vnode_notify_open(ctx, vp, fmode);
8299 #endif
8300         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
8301                                (uintptr_t)vp, 0);
8302
8303
8304         return 0;
8305 }
8306
8307 static int
8308 wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
8309 {
8310         int i, error=0, unblock=0;
8311         task_t curtask;
8312
8313         lck_mtx_lock(&nspace_handler_exclusion_lock);
8314         if (nspace_handlers[nspace_type].handler_busy) {
8315                 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8316                 return EBUSY;
8317         }
8318         nspace_handlers[nspace_type].handler_busy = 1;
8319         lck_mtx_unlock(&nspace_handler_exclusion_lock);
8320
8321         /*
8322          * Any process that gets here will be one of the namespace handlers.
8323          * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
8324          * as we can cause deadlocks to occur, because the namespace handler may prevent
8325          * VNOP_INACTIVE from proceeding.  Mark the current task as a P_DEPENDENCY_CAPABLE
8326          * process.
8327          */
8328         curtask = current_task();
8329         bsd_set_dependency_capable (curtask);
8330
8331         lck_mtx_lock(&nspace_handler_lock);
8332         if (nspace_handlers[nspace_type].handler_proc == NULL) {
8333                 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
8334                 nspace_handlers[nspace_type].handler_proc = current_proc();
8335         }
8336
8337         while (error == 0) {
8338
8339                 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8340                         if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
8341                                 if (!nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
8342                                         continue;
8343                                 }
8344                                 break;
8345                         }
8346                 }
8347
8348                 if (i < MAX_NSPACE_ITEMS) {
8349                         nspace_items[i].flags  &= ~NSPACE_ITEM_NEW;
8350                         nspace_items[i].flags  |= NSPACE_ITEM_PROCESSING;
8351                         nspace_items[i].token  = ++nspace_token_id;
8352
8353                         if (nspace_items[i].vp) {
8354                                 struct fileproc *fp;
8355                                 int32_t indx, fmode;
8356                                 struct proc *p = current_proc();
8357                                 vfs_context_t ctx = vfs_context_current();
8358                                 struct vnode_attr va;
8359
8360
8361                                 /*
8362                                  * Use vnode pointer to acquire a file descriptor for
8363                                  * hand-off to userland
8364                                  */
8365                                 fmode = nspace_open_flags_for_type(nspace_type);
8366                                 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
8367                                 if (error) {
8368                                         unblock = 1;
8369                                         break;
8370                                 }
8371                                 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
8372                                 if (error) {
8373                                         unblock = 1;
8374                                         vnode_put(nspace_items[i].vp);
8375                                         break;
8376                                 }
8377
8378                                 if ((error = falloc(p, &fp, &indx, ctx))) {
8379                                         vn_close(nspace_items[i].vp, fmode, ctx);
8380                                         vnode_put(nspace_items[i].vp);
8381                                         unblock = 1;
8382                                         break;
8383                                 }
8384
8385                                 fp->f_fglob->fg_flag = fmode;
8386                                 fp->f_fglob->fg_ops = &vnops;
8387                                 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
8388
8389                                 proc_fdlock(p);
8390                                 procfdtbl_releasefd(p, indx, NULL);
8391                                 fp_drop(p, indx, fp, 1);
8392                                 proc_fdunlock(p);
8393
8394                                 /*
8395                                  * All variants of the namespace handler struct support these three fields:
8396                                  * token, flags, and the FD pointer
8397                                  */
8398                                 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
8399                                 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
8400                                 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
8401
8402                                 /*
8403                                  * Handle optional fields:
8404                                  * extended version support an info ptr (offset, length), and the
8405                                  *
8406                                  * namedata version supports a unique per-link object ID
8407                                  *
8408                                  */
8409                                 if (nhd->infoptr) {
8410                                         uio_t uio = (uio_t)nspace_items[i].arg;
8411                                         uint64_t u_offset, u_length;
8412
8413                                         if (uio) {
8414                                                 u_offset = uio_offset(uio);
8415                                                 u_length = uio_resid(uio);
8416                                         } else {
8417                                                 u_offset = 0;
8418                                                 u_length = 0;
8419                                         }
8420                                         error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
8421                                         error = copyout(&u_length, nhd->infoptr+sizeof(uint64_t), sizeof(uint64_t));
8422                                 }
8423
8424                                 if (nhd->objid) {
8425                                         VATTR_INIT(&va);
8426                                         VATTR_WANTED(&va, va_linkid);
8427                                         error = vnode_getattr(nspace_items[i].vp, &va, ctx);
8428                                         if (error == 0 ) {
8429                                                 uint64_t linkid = 0;
8430                                                 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
8431                                                         linkid = (uint64_t)va.va_linkid;
8432                                                 }
8433                                                 error = copyout (&linkid, nhd->objid, sizeof(uint64_t));
8434                                         }
8435                                 }
8436
8437                                 if (error) {
8438                                         vn_close(nspace_items[i].vp, fmode, ctx);
8439                                         fp_free(p, indx, fp);
8440                                         unblock = 1;
8441                                 }
8442
8443                                 vnode_put(nspace_items[i].vp);
8444
8445                                 break;
8446                         } else {
8447                                 printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
8448                                        i, nspace_items[i].vp, error, nspace_items[i].vp->v_name);
8449                         }
8450
8451                 } else {
8452                         error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
8453                         if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
8454                                 error = EINVAL;
8455                                 break;
8456                         }
8457
8458                 }
8459         }
8460
8461         if (unblock) {
8462                 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
8463                         vnode_lock_spin(nspace_items[i].vp);
8464                         nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8465                         vnode_unlock(nspace_items[i].vp);
8466                 }
8467                 nspace_items[i].vp = NULL;
8468                 nspace_items[i].vid = 0;
8469                 nspace_items[i].flags = NSPACE_ITEM_DONE;
8470                 nspace_items[i].token = 0;
8471
8472                 wakeup((caddr_t)&(nspace_items[i].vp));
8473         }
8474
8475         if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
8476                 // just go through every snapshot event and unblock it immediately.
8477                 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
8478                         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8479                                 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
8480                                         if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
8481                                                 nspace_items[i].vp = NULL;
8482                                                 nspace_items[i].vid = 0;
8483                                                 nspace_items[i].flags = NSPACE_ITEM_DONE;
8484                                                 nspace_items[i].token = 0;
8485
8486                                                 wakeup((caddr_t)&(nspace_items[i].vp));
8487                                         }
8488                                 }
8489                         }
8490                 }
8491         }
8492
8493         lck_mtx_unlock(&nspace_handler_lock);
8494
8495         lck_mtx_lock(&nspace_handler_exclusion_lock);
8496         nspace_handlers[nspace_type].handler_busy = 0;
8497         lck_mtx_unlock(&nspace_handler_exclusion_lock);
8498
8499         return error;
8500 }
8501
8502 static inline int validate_namespace_args (int is64bit, int size) {
8503
8504         if (is64bit) {
8505                 /* Must be one of these */
8506                 if (size == sizeof(user64_namespace_handler_info)) {
8507                         goto sizeok;
8508                 }
8509                 if (size == sizeof(user64_namespace_handler_info_ext)) {
8510                         goto sizeok;
8511                 }
8512                 if (size == sizeof(user64_namespace_handler_data)) {
8513                         goto sizeok;
8514                 }
8515                 return EINVAL;
8516         }
8517         else {
8518                 /* 32 bit -- must be one of these */
8519                 if (size == sizeof(user32_namespace_handler_info)) {
8520                         goto sizeok;
8521                 }
8522                 if (size == sizeof(user32_namespace_handler_info_ext)) {
8523                         goto sizeok;
8524                 }
8525                 if (size == sizeof(user32_namespace_handler_data)) {
8526                         goto sizeok;
8527                 }
8528                 return EINVAL;
8529         }
8530
8531 sizeok:
8532
8533         return 0;
8534
8535 }
8536
8537 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
8538 {
8539         int error = 0;
8540         namespace_handler_data nhd;
8541
8542         bzero (&nhd, sizeof(namespace_handler_data));
8543
8544         if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
8545                         (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
8546                 return EINVAL;
8547         }
8548
8549         if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8550                 return error;
8551         }
8552
8553         error = validate_namespace_args (is64bit, size);
8554         if (error) {
8555                 return error;
8556         }
8557
8558         /* Copy in the userland pointers into our kernel-only struct */
8559
8560         if (is64bit) {
8561                 /* 64 bit userland structures */
8562                 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
8563                 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
8564                 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
8565
8566                 /* If the size is greater than the standard info struct, add in extra fields */
8567                 if (size > (sizeof(user64_namespace_handler_info))) {
8568                         if (size >= (sizeof(user64_namespace_handler_info_ext))) {
8569                                 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
8570                         }
8571                         if (size == (sizeof(user64_namespace_handler_data))) {
8572                                 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
8573                         }
8574                         /* Otherwise the fields were pre-zeroed when we did the bzero above. */
8575                 }
8576         }
8577         else {
8578                 /* 32 bit userland structures */
8579                 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
8580                 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
8581                 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
8582
8583                 if (size > (sizeof(user32_namespace_handler_info))) {
8584                         if (size >= (sizeof(user32_namespace_handler_info_ext))) {
8585                                 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
8586                         }
8587                         if (size == (sizeof(user32_namespace_handler_data))) {
8588                                 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
8589                         }
8590                         /* Otherwise the fields were pre-zeroed when we did the bzero above. */
8591                 }
8592         }
8593
8594         return wait_for_namespace_event(&nhd, nspace_type);
8595 }
8596
8597 /*
8598  * Make a filesystem-specific control call:
8599  */
8600 /* ARGSUSED */
8601 static int
8602 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
8603 {
8604         int error=0;
8605         boolean_t is64bit;
8606         u_int size;
8607 #define STK_PARAMS 128
8608         char stkbuf[STK_PARAMS];
8609         caddr_t data, memp;
8610         vnode_t vp = *arg_vp;
8611
8612         size = IOCPARM_LEN(cmd);
8613         if (size > IOCPARM_MAX) return (EINVAL);
8614
8615         is64bit = proc_is64bit(p);
8616
8617         memp = NULL;
8618         if (size > sizeof (stkbuf)) {
8619                 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
8620                 data = memp;
8621         } else {
8622                 data = &stkbuf[0];
8623         };
8624
8625         if (cmd & IOC_IN) {
8626                 if (size) {
8627                         error = copyin(udata, data, size);
8628                         if (error) goto FSCtl_Exit;
8629                 } else {
8630                         if (is64bit) {
8631                                 *(user_addr_t *)data = udata;
8632                         }
8633                         else {
8634                                 *(uint32_t *)data = (uint32_t)udata;
8635                         }
8636                 };
8637         } else if ((cmd & IOC_OUT) && size) {
8638                 /*
8639                  * Zero the buffer so the user always
8640                  * gets back something deterministic.
8641                  */
8642                 bzero(data, size);
8643         } else if (cmd & IOC_VOID) {
8644                 if (is64bit) {
8645                         *(user_addr_t *)data = udata;
8646                 }
8647                 else {
8648                         *(uint32_t *)data = (uint32_t)udata;
8649                 }
8650         }
8651
8652         /* Check to see if it's a generic command */
8653         if (IOCBASECMD(cmd) == FSCTL_SYNC_VOLUME) {
8654                 mount_t mp = vp->v_mount;
8655                 int arg = *(uint32_t*)data;
8656
8657                 /* record vid of vp so we can drop it below. */
8658                 uint32_t vvid = vp->v_id;
8659
8660                 /*
8661                  * Then grab mount_iterref so that we can release the vnode.
8662                  * Without this, a thread may call vnode_iterate_prepare then
8663                  * get into a deadlock because we've never released the root vp
8664                  */
8665                 error = mount_iterref (mp, 0);
8666                 if (error)  {
8667                         goto FSCtl_Exit;
8668                 }
8669                 vnode_put(vp);
8670
8671                 /* issue the sync for this volume */
8672                 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
8673
8674                 /*
8675                  * Then release the mount_iterref once we're done syncing; it's not
8676                  * needed for the VNOP_IOCTL below
8677                  */
8678                 mount_iterdrop(mp);
8679
8680                 if (arg & FSCTL_SYNC_FULLSYNC) {
8681                         /* re-obtain vnode iocount on the root vp, if possible */
8682                         error = vnode_getwithvid (vp, vvid);
8683                         if (error == 0) {
8684                                 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
8685                                 vnode_put (vp);
8686                         }
8687                 }
8688                 /* mark the argument VP as having been released */
8689                 *arg_vp = NULL;
8690
8691         } else if (IOCBASECMD(cmd) == FSCTL_SET_PACKAGE_EXTS) {
8692                 user_addr_t ext_strings;
8693                 uint32_t    num_entries;
8694                 uint32_t    max_width;
8695
8696                 if (   (is64bit && size != sizeof(user64_package_ext_info))
8697                    || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
8698
8699                         // either you're 64-bit and passed a 64-bit struct or
8700                         // you're 32-bit and passed a 32-bit struct.  otherwise
8701                         // it's not ok.
8702                         error = EINVAL;
8703                         goto FSCtl_Exit;
8704                 }
8705
8706                 if (is64bit) {
8707                         ext_strings = ((user64_package_ext_info *)data)->strings;
8708                         num_entries = ((user64_package_ext_info *)data)->num_entries;
8709                         max_width   = ((user64_package_ext_info *)data)->max_width;
8710                 } else {
8711                         ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
8712                         num_entries = ((user32_package_ext_info *)data)->num_entries;
8713                         max_width   = ((user32_package_ext_info *)data)->max_width;
8714                 }
8715
8716                 error = set_package_extensions_table(ext_strings, num_entries, max_width);
8717
8718
8719         }
8720
8721         /* namespace handlers */
8722         else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_GET) {
8723                 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
8724         }
8725
8726         /* Snapshot handlers */
8727         else if (IOCBASECMD(cmd) == FSCTL_OLD_SNAPSHOT_HANDLER_GET) {
8728                 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
8729         } else if (IOCBASECMD(cmd) == FSCTL_SNAPSHOT_HANDLER_GET_EXT) {
8730                 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
8731         }
8732
8733         /* Tracked File Handlers */
8734         else if (IOCBASECMD(cmd) == FSCTL_TRACKED_HANDLER_GET) {
8735                 error = process_namespace_fsctl(NSPACE_HANDLER_TRACK, is64bit, size, data);
8736         }
8737         else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_GETDATA) {
8738                 error = process_namespace_fsctl(NSPACE_HANDLER_TRACK, is64bit, size, data);
8739         } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_UPDATE) {
8740                 uint32_t token, val;
8741                 int i;
8742
8743                 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
8744                         goto FSCtl_Exit;
8745                 }
8746
8747                 if (!nspace_is_special_process(p)) {
8748                         error = EINVAL;
8749                         goto FSCtl_Exit;
8750                 }
8751
8752                 token = ((uint32_t *)data)[0];
8753                 val   = ((uint32_t *)data)[1];
8754
8755                 lck_mtx_lock(&nspace_handler_lock);
8756
8757                 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8758                         if (nspace_items[i].token == token) {
8759                                 break;
8760                         }
8761                 }
8762
8763                 if (i >= MAX_NSPACE_ITEMS) {
8764                         error = ENOENT;
8765                 } else {
8766                         //
8767                         // if this bit is set, when resolve_nspace_item() times out
8768                         // it will loop and go back to sleep.
8769                         //
8770                         nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
8771                 }
8772
8773                 lck_mtx_unlock(&nspace_handler_lock);
8774
8775                 if (error) {
8776                         printf("nspace-handler-update: did not find token %u\n", token);
8777                 }
8778
8779         } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_UNBLOCK) {
8780                 uint32_t token, val;
8781                 int i;
8782
8783                 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
8784                         goto FSCtl_Exit;
8785                 }
8786
8787                 if (!nspace_is_special_process(p)) {
8788                         error = EINVAL;
8789                         goto FSCtl_Exit;
8790                 }
8791
8792                 token = ((uint32_t *)data)[0];
8793                 val   = ((uint32_t *)data)[1];
8794
8795                 lck_mtx_lock(&nspace_handler_lock);
8796
8797                 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8798                         if (nspace_items[i].token == token) {
8799                                 break;
8800                         }
8801                 }
8802
8803                 if (i >= MAX_NSPACE_ITEMS) {
8804                         printf("nspace-handler-unblock: did not find token %u\n", token);
8805                         error = ENOENT;
8806                 } else {
8807                         if (val == 0 && nspace_items[i].vp) {
8808                                 vnode_lock_spin(nspace_items[i].vp);
8809                                 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8810                                 vnode_unlock(nspace_items[i].vp);
8811                         }
8812
8813                         nspace_items[i].vp = NULL;
8814                         nspace_items[i].arg = NULL;
8815                         nspace_items[i].op = 0;
8816                         nspace_items[i].vid = 0;
8817                         nspace_items[i].flags = NSPACE_ITEM_DONE;
8818                         nspace_items[i].token = 0;
8819
8820                         wakeup((caddr_t)&(nspace_items[i].vp));
8821                 }
8822
8823                 lck_mtx_unlock(&nspace_handler_lock);
8824
8825         } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_CANCEL) {
8826                 uint32_t token, val;
8827                 int i;
8828
8829                 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
8830                         goto FSCtl_Exit;
8831                 }
8832
8833                 if (!nspace_is_special_process(p)) {
8834                         error = EINVAL;
8835                         goto FSCtl_Exit;
8836                 }
8837
8838                 token = ((uint32_t *)data)[0];
8839                 val   = ((uint32_t *)data)[1];
8840
8841                 lck_mtx_lock(&nspace_handler_lock);
8842
8843                 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8844                         if (nspace_items[i].token == token) {
8845                                 break;
8846                         }
8847                 }
8848
8849                 if (i >= MAX_NSPACE_ITEMS) {
8850                         printf("nspace-handler-cancel: did not find token %u\n", token);
8851                         error = ENOENT;
8852                 } else {
8853                         if (nspace_items[i].vp) {
8854                                 vnode_lock_spin(nspace_items[i].vp);
8855                                 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8856                                 vnode_unlock(nspace_items[i].vp);
8857                         }
8858
8859                         nspace_items[i].vp = NULL;
8860                         nspace_items[i].arg = NULL;
8861                         nspace_items[i].vid = 0;
8862                         nspace_items[i].token = val;
8863                         nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
8864                         nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
8865
8866                         wakeup((caddr_t)&(nspace_items[i].vp));
8867                 }
8868
8869                 lck_mtx_unlock(&nspace_handler_lock);
8870         } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME) {
8871                 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8872                         goto FSCtl_Exit;
8873                 }
8874
8875                 // we explicitly do not do the namespace_handler_proc check here
8876
8877                 lck_mtx_lock(&nspace_handler_lock);
8878                 snapshot_timestamp = ((uint32_t *)data)[0];
8879                 wakeup(&nspace_item_idx);
8880                 lck_mtx_unlock(&nspace_handler_lock);
8881                 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
8882
8883         } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS) {
8884                 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8885                         goto FSCtl_Exit;
8886                 }
8887
8888                 lck_mtx_lock(&nspace_handler_lock);
8889                 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
8890                 lck_mtx_unlock(&nspace_handler_lock);
8891                 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
8892                        nspace_allow_virtual_devs ? "" : " NOT");
8893                 error = 0;
8894
8895         } else if (IOCBASECMD(cmd) == FSCTL_SET_FSTYPENAME_OVERRIDE) {
8896                 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8897                         goto FSCtl_Exit;
8898                 }
8899                 if (vp->v_mount) {
8900                         mount_lock(vp->v_mount);
8901                         if (data[0] != 0) {
8902                                 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
8903                                 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
8904                                 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
8905                                         vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
8906                                         vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
8907                                 }
8908                         } else {
8909                                 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
8910                                         vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
8911                                 }
8912                                 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
8913                                 vp->v_mount->fstypename_override[0] = '\0';
8914                         }
8915                         mount_unlock(vp->v_mount);
8916                 }
8917         } else {
8918                 /* Invoke the filesystem-specific code */
8919                 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
8920         }
8921
8922
8923         /*
8924          * Copy any data to user, size was
8925          * already set and checked above.
8926          */
8927         if (error == 0 && (cmd & IOC_OUT) && size)
8928                 error = copyout(data, udata, size);
8929
8930 FSCtl_Exit:
8931         if (memp) kfree(memp, size);
8932
8933         return error;
8934 }
8935
8936 /* ARGSUSED */
8937 int
8938 fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
8939 {
8940         int error;
8941         struct nameidata nd;
8942         u_long nameiflags;
8943         vnode_t vp = NULL;
8944         vfs_context_t ctx = vfs_context_current();
8945
8946         AUDIT_ARG(cmd, uap->cmd);
8947         AUDIT_ARG(value32, uap->options);
8948         /* Get the vnode for the file we are getting info on:  */
8949         nameiflags = 0;
8950         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8951         NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
8952                UIO_USERSPACE, uap->path, ctx);
8953         if ((error = namei(&nd))) goto done;
8954         vp = nd.ni_vp;
8955         nameidone(&nd);
8956
8957 #if CONFIG_MACF
8958         error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
8959         if (error) {
8960                 goto done;
8961         }
8962 #endif
8963
8964         error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
8965
8966 done:
8967         if (vp)
8968                 vnode_put(vp);
8969         return error;
8970 }
8971 /* ARGSUSED */
8972 int
8973 ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
8974 {
8975         int error;
8976         vnode_t vp = NULL;
8977         vfs_context_t ctx = vfs_context_current();
8978         int fd = -1;
8979
8980         AUDIT_ARG(fd, uap->fd);
8981         AUDIT_ARG(cmd, uap->cmd);
8982         AUDIT_ARG(value32, uap->options);
8983
8984         /* Get the vnode for the file we are getting info on:  */
8985         if ((error = file_vnode(uap->fd, &vp)))
8986                 goto done;
8987         fd = uap->fd;
8988         if ((error = vnode_getwithref(vp))) {
8989                 goto done;
8990         }
8991
8992 #if CONFIG_MACF
8993         error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
8994         if (error) {
8995                 goto done;
8996         }
8997 #endif
8998
8999         error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
9000
9001 done:
9002         if (fd != -1)
9003                 file_drop(fd);
9004
9005         if (vp)
9006                 vnode_put(vp);
9007         return error;
9008 }
9009 /* end of fsctl system call */
9010
9011 /*
9012  * An in-kernel sync for power management to call.
9013  */
9014 __private_extern__ int
9015 sync_internal(void)
9016 {
9017         int error;
9018
9019         struct sync_args data;
9020
9021         int retval[2];
9022
9023
9024         error = sync(current_proc(), &data, &retval[0]);
9025
9026
9027         return (error);
9028 } /* end of sync_internal call */
9029
9030
9031 /*
9032  *  Retrieve the data of an extended attribute.
9033  */
9034 int
9035 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
9036 {
9037         vnode_t vp;
9038         struct nameidata nd;
9039         char attrname[XATTR_MAXNAMELEN+1];
9040         vfs_context_t ctx = vfs_context_current();
9041         uio_t auio = NULL;
9042         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9043         size_t attrsize = 0;
9044         size_t namelen;
9045         u_int32_t nameiflags;
9046         int error;
9047         char uio_buf[ UIO_SIZEOF(1) ];
9048
9049         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9050                 return (EINVAL);
9051
9052         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9053         NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
9054         if ((error = namei(&nd))) {
9055                 return (error);
9056         }
9057         vp = nd.ni_vp;
9058         nameidone(&nd);
9059
9060         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9061                 goto out;
9062         }
9063         if (xattr_protected(attrname)) {
9064                 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
9065                         error = EPERM;
9066                         goto out;
9067                 }
9068         }
9069         /*
9070          * the specific check for 0xffffffff is a hack to preserve
9071          * binaray compatibilty in K64 with applications that discovered
9072          * that passing in a buf pointer and a size of -1 resulted in
9073          * just the size of the indicated extended attribute being returned.
9074          * this isn't part of the documented behavior, but because of the
9075          * original implemtation's check for "uap->size > 0", this behavior
9076          * was allowed. In K32 that check turned into a signed comparison
9077          * even though uap->size is unsigned...  in K64, we blow by that
9078          * check because uap->size is unsigned and doesn't get sign smeared
9079          * in the munger for a 32 bit user app.  we also need to add a
9080          * check to limit the maximum size of the buffer being passed in...
9081          * unfortunately, the underlying fileystems seem to just malloc
9082          * the requested size even if the actual extended attribute is tiny.
9083          * because that malloc is for kernel wired memory, we have to put a
9084          * sane limit on it.
9085          *
9086          * U32 running on K64 will yield 0x00000000ffffffff for uap->size
9087          * U64 running on K64 will yield -1 (64 bits wide)
9088          * U32/U64 running on K32 will yield -1 (32 bits wide)
9089          */
9090         if (uap->size == 0xffffffff || uap->size == (size_t)-1)
9091                 goto no_uio;
9092
9093         if (uap->value) {
9094                 if (uap->size > (size_t)XATTR_MAXSIZE)
9095                         uap->size = XATTR_MAXSIZE;
9096
9097                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9098                                             &uio_buf[0], sizeof(uio_buf));
9099                 uio_addiov(auio, uap->value, uap->size);
9100         }
9101 no_uio:
9102         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
9103 out:
9104         vnode_put(vp);
9105
9106         if (auio) {
9107                 *retval = uap->size - uio_resid(auio);
9108         } else {
9109                 *retval = (user_ssize_t)attrsize;
9110         }
9111
9112         return (error);
9113 }
9114
9115 /*
9116  * Retrieve the data of an extended attribute.
9117  */
9118 int
9119 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
9120 {
9121         vnode_t vp;
9122         char attrname[XATTR_MAXNAMELEN+1];
9123         uio_t auio = NULL;
9124         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9125         size_t attrsize = 0;
9126         size_t namelen;
9127         int error;
9128         char uio_buf[ UIO_SIZEOF(1) ];
9129
9130         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9131                 return (EINVAL);
9132
9133         if ( (error = file_vnode(uap->fd, &vp)) ) {
9134                 return (error);
9135         }
9136         if ( (error = vnode_getwithref(vp)) ) {
9137                 file_drop(uap->fd);
9138                 return(error);
9139         }
9140         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9141                 goto out;
9142         }
9143         if (xattr_protected(attrname)) {
9144                 error = EPERM;
9145                 goto out;
9146         }
9147         if (uap->value && uap->size > 0) {
9148                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9149                                             &uio_buf[0], sizeof(uio_buf));
9150                 uio_addiov(auio, uap->value, uap->size);
9151         }
9152
9153         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
9154 out:
9155         (void)vnode_put(vp);
9156         file_drop(uap->fd);
9157
9158         if (auio) {
9159                 *retval = uap->size - uio_resid(auio);
9160         } else {
9161                 *retval = (user_ssize_t)attrsize;
9162         }
9163         return (error);
9164 }
9165
9166 /*
9167  * Set the data of an extended attribute.
9168  */
9169 int
9170 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
9171 {
9172         vnode_t vp;
9173         struct nameidata nd;
9174         char attrname[XATTR_MAXNAMELEN+1];
9175         vfs_context_t ctx = vfs_context_current();
9176         uio_t auio = NULL;
9177         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9178         size_t namelen;
9179         u_int32_t nameiflags;
9180         int error;
9181         char uio_buf[ UIO_SIZEOF(1) ];
9182
9183         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9184                 return (EINVAL);
9185
9186         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9187                 if (error == EPERM) {
9188                         /* if the string won't fit in attrname, copyinstr emits EPERM */
9189                         return (ENAMETOOLONG);
9190                 }
9191                 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9192                 return error;
9193         }
9194         if (xattr_protected(attrname))
9195                 return(EPERM);
9196         if (uap->size != 0 && uap->value == 0) {
9197                 return (EINVAL);
9198         }
9199
9200         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9201         NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
9202         if ((error = namei(&nd))) {
9203                 return (error);
9204         }
9205         vp = nd.ni_vp;
9206         nameidone(&nd);
9207
9208         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9209                                     &uio_buf[0], sizeof(uio_buf));
9210         uio_addiov(auio, uap->value, uap->size);
9211
9212         error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
9213 #if CONFIG_FSE
9214         if (error == 0) {
9215                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9216                     FSE_ARG_VNODE, vp,
9217                     FSE_ARG_DONE);
9218         }
9219 #endif
9220         vnode_put(vp);
9221         *retval = 0;
9222         return (error);
9223 }
9224
9225 /*
9226  * Set the data of an extended attribute.
9227  */
9228 int
9229 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
9230 {
9231         vnode_t vp;
9232         char attrname[XATTR_MAXNAMELEN+1];
9233         uio_t auio = NULL;
9234         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9235         size_t namelen;
9236         int error;
9237         char uio_buf[ UIO_SIZEOF(1) ];
9238 #if CONFIG_FSE
9239         vfs_context_t ctx = vfs_context_current();
9240 #endif
9241
9242         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9243                 return (EINVAL);
9244
9245         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9246                 return (error);
9247         }
9248         if (xattr_protected(attrname))
9249                 return(EPERM);
9250         if (uap->size != 0 && uap->value == 0) {
9251                 return (EINVAL);
9252         }
9253         if ( (error = file_vnode(uap->fd, &vp)) ) {
9254                 return (error);
9255         }
9256         if ( (error = vnode_getwithref(vp)) ) {
9257                 file_drop(uap->fd);
9258                 return(error);
9259         }
9260         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9261                                     &uio_buf[0], sizeof(uio_buf));
9262         uio_addiov(auio, uap->value, uap->size);
9263
9264         error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
9265 #if CONFIG_FSE
9266         if (error == 0) {
9267                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9268                     FSE_ARG_VNODE, vp,
9269                     FSE_ARG_DONE);
9270         }
9271 #endif
9272         vnode_put(vp);
9273         file_drop(uap->fd);
9274         *retval = 0;
9275         return (error);
9276 }
9277
9278 /*
9279  * Remove an extended attribute.
9280  * XXX Code duplication here.
9281  */
9282 int
9283 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
9284 {
9285         vnode_t vp;
9286         struct nameidata nd;
9287         char attrname[XATTR_MAXNAMELEN+1];
9288         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9289         vfs_context_t ctx = vfs_context_current();
9290         size_t namelen;
9291         u_int32_t nameiflags;
9292         int error;
9293
9294         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9295                 return (EINVAL);
9296
9297         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
9298         if (error != 0) {
9299                 return (error);
9300         }
9301         if (xattr_protected(attrname))
9302                 return(EPERM);
9303         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9304         NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
9305         if ((error = namei(&nd))) {
9306                 return (error);
9307         }
9308         vp = nd.ni_vp;
9309         nameidone(&nd);
9310
9311         error = vn_removexattr(vp, attrname, uap->options, ctx);
9312 #if CONFIG_FSE
9313         if (error == 0) {
9314                 add_fsevent(FSE_XATTR_REMOVED, ctx,
9315                     FSE_ARG_VNODE, vp,
9316                     FSE_ARG_DONE);
9317         }
9318 #endif
9319         vnode_put(vp);
9320         *retval = 0;
9321         return (error);
9322 }
9323
9324 /*
9325  * Remove an extended attribute.
9326  * XXX Code duplication here.
9327  */
9328 int
9329 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
9330 {
9331         vnode_t vp;
9332         char attrname[XATTR_MAXNAMELEN+1];
9333         size_t namelen;
9334         int error;
9335 #if CONFIG_FSE
9336         vfs_context_t ctx = vfs_context_current();
9337 #endif
9338
9339         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9340                 return (EINVAL);
9341
9342         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
9343         if (error != 0) {
9344                 return (error);
9345         }
9346         if (xattr_protected(attrname))
9347                 return(EPERM);
9348         if ( (error = file_vnode(uap->fd, &vp)) ) {
9349                 return (error);
9350         }
9351         if ( (error = vnode_getwithref(vp)) ) {
9352                 file_drop(uap->fd);
9353                 return(error);
9354         }
9355
9356         error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
9357 #if CONFIG_FSE
9358         if (error == 0) {
9359                 add_fsevent(FSE_XATTR_REMOVED, ctx,
9360                     FSE_ARG_VNODE, vp,
9361                     FSE_ARG_DONE);
9362         }
9363 #endif
9364         vnode_put(vp);
9365         file_drop(uap->fd);
9366         *retval = 0;
9367         return (error);
9368 }
9369
9370 /*
9371  * Retrieve the list of extended attribute names.
9372  * XXX Code duplication here.
9373  */
9374 int
9375 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
9376 {
9377         vnode_t vp;
9378         struct nameidata nd;
9379         vfs_context_t ctx = vfs_context_current();
9380         uio_t auio = NULL;
9381         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9382         size_t attrsize = 0;
9383         u_int32_t nameiflags;
9384         int error;
9385         char uio_buf[ UIO_SIZEOF(1) ];
9386
9387         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9388                 return (EINVAL);
9389
9390         nameiflags = ((uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW) | NOTRIGGER;
9391         NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
9392         if ((error = namei(&nd))) {
9393                 return (error);
9394         }
9395         vp = nd.ni_vp;
9396         nameidone(&nd);
9397         if (uap->namebuf != 0 && uap->bufsize > 0) {
9398                 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
9399                                             &uio_buf[0], sizeof(uio_buf));
9400                 uio_addiov(auio, uap->namebuf, uap->bufsize);
9401         }
9402
9403         error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
9404
9405         vnode_put(vp);
9406         if (auio) {
9407                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
9408         } else {
9409                 *retval = (user_ssize_t)attrsize;
9410         }
9411         return (error);
9412 }
9413
9414 /*
9415  * Retrieve the list of extended attribute names.
9416  * XXX Code duplication here.
9417  */
9418 int
9419 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
9420 {
9421         vnode_t vp;
9422         uio_t auio = NULL;
9423         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9424         size_t attrsize = 0;
9425         int error;
9426         char uio_buf[ UIO_SIZEOF(1) ];
9427
9428         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9429                 return (EINVAL);
9430
9431         if ( (error = file_vnode(uap->fd, &vp)) ) {
9432                 return (error);
9433         }
9434         if ( (error = vnode_getwithref(vp)) ) {
9435                 file_drop(uap->fd);
9436                 return(error);
9437         }
9438         if (uap->namebuf != 0 && uap->bufsize > 0) {
9439                 auio = uio_createwithbuffer(1, 0, spacetype,
9440                                                                           UIO_READ, &uio_buf[0], sizeof(uio_buf));
9441                 uio_addiov(auio, uap->namebuf, uap->bufsize);
9442         }
9443
9444         error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
9445
9446         vnode_put(vp);
9447         file_drop(uap->fd);
9448         if (auio) {
9449                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
9450         } else {
9451                 *retval = (user_ssize_t)attrsize;
9452         }
9453         return (error);
9454 }
9455
9456 /*
9457  * Obtain the full pathname of a file system object by id.
9458  *
9459  * This is a private SPI used by the File Manager.
9460  */
9461 __private_extern__
9462 int
9463 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
9464 {
9465         vnode_t vp;
9466         struct mount *mp = NULL;
9467         vfs_context_t ctx = vfs_context_current();
9468         fsid_t fsid;
9469         char *realpath;
9470         int bpflags;
9471         int length;
9472         int error;
9473
9474         if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
9475                 return (error);
9476         }
9477         AUDIT_ARG(value32, fsid.val[0]);
9478         AUDIT_ARG(value64, uap->objid);
9479         /* Restrict output buffer size for now. */
9480         if (uap->bufsize > PAGE_SIZE) {
9481                 return (EINVAL);
9482         }
9483         MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
9484         if (realpath == NULL) {
9485                 return (ENOMEM);
9486         }
9487         /* Find the target mountpoint. */
9488         if ((mp = mount_lookupby_volfsid(fsid.val[0], 1)) == NULL) {
9489                 error = ENOTSUP;  /* unexpected failure */
9490                 goto out;
9491         }
9492 unionget:
9493         /* Find the target vnode. */
9494         if (uap->objid == 2) {
9495                 error = VFS_ROOT(mp, &vp, ctx);
9496         } else {
9497                 error = VFS_VGET(mp, (ino64_t)uap->objid, &vp, ctx);
9498         }
9499
9500         if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
9501                 /*
9502                  * If the fileid isn't found and we're in a union
9503                  * mount volume, then see if the fileid is in the
9504                  * mounted-on volume.
9505                  */
9506                 struct mount *tmp = mp;
9507                 mp = vnode_mount(tmp->mnt_vnodecovered);
9508                 vfs_unbusy(tmp);
9509                 if (vfs_busy(mp, LK_NOWAIT) == 0)
9510                         goto unionget;
9511         } else
9512                 vfs_unbusy(mp);
9513
9514         if (error) {
9515                 goto out;
9516         }
9517 #if CONFIG_MACF
9518         error = mac_vnode_check_fsgetpath(ctx, vp);
9519         if (error) {
9520                 vnode_put(vp);
9521                 goto out;
9522         }
9523 #endif
9524         /* Obtain the absolute path to this vnode. */
9525         bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
9526         bpflags |= BUILDPATH_CHECK_MOVED;
9527         error = build_path(vp, realpath, uap->bufsize, &length, bpflags, ctx);
9528         vnode_put(vp);
9529         if (error) {
9530                 goto out;
9531         }
9532         AUDIT_ARG(text, realpath);
9533
9534         if (kdebug_enable) {
9535                 long dbg_parms[NUMPARMS];
9536                 int  dbg_namelen;
9537
9538                 dbg_namelen = (int)sizeof(dbg_parms);
9539
9540                 if (length < dbg_namelen) {
9541                         memcpy((char *)dbg_parms, realpath, length);
9542                         memset((char *)dbg_parms + length, 0, dbg_namelen - length);
9543
9544                         dbg_namelen = length;
9545                 } else
9546                         memcpy((char *)dbg_parms, realpath + (length - dbg_namelen), dbg_namelen);
9547
9548                 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
9549         }
9550         error = copyout((caddr_t)realpath, uap->buf, length);
9551
9552         *retval = (user_ssize_t)length; /* may be superseded by error */
9553 out:
9554         if (realpath) {
9555                 FREE(realpath, M_TEMP);
9556         }
9557         return (error);
9558 }
9559
9560 /*
9561  * Common routine to handle various flavors of statfs data heading out
9562  *      to user space.
9563  *
9564  * Returns:     0                       Success
9565  *              EFAULT
9566  */
9567 static int
9568 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
9569     user_addr_t bufp, int *sizep, boolean_t is_64_bit,
9570     boolean_t partial_copy)
9571 {
9572         int             error;
9573         int             my_size, copy_size;
9574
9575         if (is_64_bit) {
9576                 struct user64_statfs sfs;
9577                 my_size = copy_size = sizeof(sfs);
9578                 bzero(&sfs, my_size);
9579                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
9580                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
9581                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
9582                 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
9583                 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
9584                 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
9585                 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
9586                 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
9587                 sfs.f_files = (user64_long_t)sfsp->f_files;
9588                 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
9589                 sfs.f_fsid = sfsp->f_fsid;
9590                 sfs.f_owner = sfsp->f_owner;
9591                 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
9592                         strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
9593                 } else {
9594                         strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
9595                 }
9596                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
9597                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
9598
9599                 if (partial_copy) {
9600                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
9601                 }
9602                 error = copyout((caddr_t)&sfs, bufp, copy_size);
9603         }
9604         else {
9605                 struct user32_statfs sfs;
9606
9607                 my_size = copy_size = sizeof(sfs);
9608                 bzero(&sfs, my_size);
9609
9610                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
9611                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
9612                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
9613
9614                 /*
9615                  * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
9616                  * have to fudge the numbers here in that case.   We inflate the blocksize in order
9617                  * to reflect the filesystem size as best we can.
9618                  */
9619                 if ((sfsp->f_blocks > INT_MAX)
9620                         /* Hack for 4061702 . I think the real fix is for Carbon to
9621                          * look for some volume capability and not depend on hidden
9622                          * semantics agreed between a FS and carbon.
9623                          * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
9624                          * for Carbon to set bNoVolumeSizes volume attribute.
9625                          * Without this the webdavfs files cannot be copied onto
9626                          * disk as they look huge. This change should not affect
9627                          * XSAN as they should not setting these to -1..
9628                          */
9629                          && (sfsp->f_blocks != 0xffffffffffffffffULL)
9630                          && (sfsp->f_bfree != 0xffffffffffffffffULL)
9631                          && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
9632                         int             shift;
9633
9634                         /*
9635                          * Work out how far we have to shift the block count down to make it fit.
9636                          * Note that it's possible to have to shift so far that the resulting
9637                          * blocksize would be unreportably large.  At that point, we will clip
9638                          * any values that don't fit.
9639                          *
9640                          * For safety's sake, we also ensure that f_iosize is never reported as
9641                          * being smaller than f_bsize.
9642                          */
9643                         for (shift = 0; shift < 32; shift++) {
9644                                 if ((sfsp->f_blocks >> shift) <= INT_MAX)
9645                                         break;
9646                                 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
9647                                         break;
9648                         }
9649 #define __SHIFT_OR_CLIP(x, s)   ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
9650                         sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
9651                         sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
9652                         sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
9653 #undef __SHIFT_OR_CLIP
9654                         sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
9655                         sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
9656                 } else {
9657                         /* filesystem is small enough to be reported honestly */
9658                         sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
9659                         sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
9660                         sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
9661                         sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
9662                         sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
9663                 }
9664                 sfs.f_files = (user32_long_t)sfsp->f_files;
9665                 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
9666                 sfs.f_fsid = sfsp->f_fsid;
9667                 sfs.f_owner = sfsp->f_owner;
9668                 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
9669                         strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
9670                 } else {
9671                         strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
9672                 }
9673                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
9674                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
9675
9676                 if (partial_copy) {
9677                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
9678                 }
9679                 error = copyout((caddr_t)&sfs, bufp, copy_size);
9680         }
9681
9682         if (sizep != NULL) {
9683                 *sizep = my_size;
9684         }
9685         return(error);
9686 }
9687
9688 /*
9689  * copy stat structure into user_stat structure.
9690  */
9691 void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
9692 {
9693         bzero(usbp, sizeof(*usbp));
9694
9695         usbp->st_dev = sbp->st_dev;
9696         usbp->st_ino = sbp->st_ino;
9697         usbp->st_mode = sbp->st_mode;
9698         usbp->st_nlink = sbp->st_nlink;
9699         usbp->st_uid = sbp->st_uid;
9700         usbp->st_gid = sbp->st_gid;
9701         usbp->st_rdev = sbp->st_rdev;
9702 #ifndef _POSIX_C_SOURCE
9703         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9704         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9705         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9706         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9707         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9708         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9709 #else
9710         usbp->st_atime = sbp->st_atime;
9711         usbp->st_atimensec = sbp->st_atimensec;
9712         usbp->st_mtime = sbp->st_mtime;
9713         usbp->st_mtimensec = sbp->st_mtimensec;
9714         usbp->st_ctime = sbp->st_ctime;
9715         usbp->st_ctimensec = sbp->st_ctimensec;
9716 #endif
9717         usbp->st_size = sbp->st_size;
9718         usbp->st_blocks = sbp->st_blocks;
9719         usbp->st_blksize = sbp->st_blksize;
9720         usbp->st_flags = sbp->st_flags;
9721         usbp->st_gen = sbp->st_gen;
9722         usbp->st_lspare = sbp->st_lspare;
9723         usbp->st_qspare[0] = sbp->st_qspare[0];
9724         usbp->st_qspare[1] = sbp->st_qspare[1];
9725 }
9726
9727 void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
9728 {
9729         bzero(usbp, sizeof(*usbp));
9730
9731         usbp->st_dev = sbp->st_dev;
9732         usbp->st_ino = sbp->st_ino;
9733         usbp->st_mode = sbp->st_mode;
9734         usbp->st_nlink = sbp->st_nlink;
9735         usbp->st_uid = sbp->st_uid;
9736         usbp->st_gid = sbp->st_gid;
9737         usbp->st_rdev = sbp->st_rdev;
9738 #ifndef _POSIX_C_SOURCE
9739         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9740         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9741         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9742         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9743         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9744         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9745 #else
9746         usbp->st_atime = sbp->st_atime;
9747         usbp->st_atimensec = sbp->st_atimensec;
9748         usbp->st_mtime = sbp->st_mtime;
9749         usbp->st_mtimensec = sbp->st_mtimensec;
9750         usbp->st_ctime = sbp->st_ctime;
9751         usbp->st_ctimensec = sbp->st_ctimensec;
9752 #endif
9753         usbp->st_size = sbp->st_size;
9754         usbp->st_blocks = sbp->st_blocks;
9755         usbp->st_blksize = sbp->st_blksize;
9756         usbp->st_flags = sbp->st_flags;
9757         usbp->st_gen = sbp->st_gen;
9758         usbp->st_lspare = sbp->st_lspare;
9759         usbp->st_qspare[0] = sbp->st_qspare[0];
9760         usbp->st_qspare[1] = sbp->st_qspare[1];
9761 }
9762
9763 /*
9764  * copy stat64 structure into user_stat64 structure.
9765  */
9766 void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
9767 {
9768         bzero(usbp, sizeof(*usbp));
9769
9770         usbp->st_dev = sbp->st_dev;
9771         usbp->st_ino = sbp->st_ino;
9772         usbp->st_mode = sbp->st_mode;
9773         usbp->st_nlink = sbp->st_nlink;
9774         usbp->st_uid = sbp->st_uid;
9775         usbp->st_gid = sbp->st_gid;
9776         usbp->st_rdev = sbp->st_rdev;
9777 #ifndef _POSIX_C_SOURCE
9778         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9779         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9780         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9781         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9782         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9783         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9784         usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
9785         usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
9786 #else
9787         usbp->st_atime = sbp->st_atime;
9788         usbp->st_atimensec = sbp->st_atimensec;
9789         usbp->st_mtime = sbp->st_mtime;
9790         usbp->st_mtimensec = sbp->st_mtimensec;
9791         usbp->st_ctime = sbp->st_ctime;
9792         usbp->st_ctimensec = sbp->st_ctimensec;
9793         usbp->st_birthtime = sbp->st_birthtime;
9794         usbp->st_birthtimensec = sbp->st_birthtimensec;
9795 #endif
9796         usbp->st_size = sbp->st_size;
9797         usbp->st_blocks = sbp->st_blocks;
9798         usbp->st_blksize = sbp->st_blksize;
9799         usbp->st_flags = sbp->st_flags;
9800         usbp->st_gen = sbp->st_gen;
9801         usbp->st_lspare = sbp->st_lspare;
9802         usbp->st_qspare[0] = sbp->st_qspare[0];
9803         usbp->st_qspare[1] = sbp->st_qspare[1];
9804 }
9805
9806 void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
9807 {
9808         bzero(usbp, sizeof(*usbp));
9809
9810         usbp->st_dev = sbp->st_dev;
9811         usbp->st_ino = sbp->st_ino;
9812         usbp->st_mode = sbp->st_mode;
9813         usbp->st_nlink = sbp->st_nlink;
9814         usbp->st_uid = sbp->st_uid;
9815         usbp->st_gid = sbp->st_gid;
9816         usbp->st_rdev = sbp->st_rdev;
9817 #ifndef _POSIX_C_SOURCE
9818         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9819         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9820         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9821         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9822         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9823         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9824         usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
9825         usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
9826 #else
9827         usbp->st_atime = sbp->st_atime;
9828         usbp->st_atimensec = sbp->st_atimensec;
9829         usbp->st_mtime = sbp->st_mtime;
9830         usbp->st_mtimensec = sbp->st_mtimensec;
9831         usbp->st_ctime = sbp->st_ctime;
9832         usbp->st_ctimensec = sbp->st_ctimensec;
9833         usbp->st_birthtime = sbp->st_birthtime;
9834         usbp->st_birthtimensec = sbp->st_birthtimensec;
9835 #endif
9836         usbp->st_size = sbp->st_size;
9837         usbp->st_blocks = sbp->st_blocks;
9838         usbp->st_blksize = sbp->st_blksize;
9839         usbp->st_flags = sbp->st_flags;
9840         usbp->st_gen = sbp->st_gen;
9841         usbp->st_lspare = sbp->st_lspare;
9842         usbp->st_qspare[0] = sbp->st_qspare[0];
9843         usbp->st_qspare[1] = sbp->st_qspare[1];
9844 }
9845
9846 /*
9847  * Purge buffer cache for simulating cold starts
9848  */
9849 static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
9850 {
9851         ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
9852
9853         return VNODE_RETURNED;
9854 }
9855
9856 static int vfs_purge_callback(mount_t mp, __unused void * arg)
9857 {
9858         vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
9859
9860         return VFS_RETURNED;
9861 }
9862
9863 int
9864 vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
9865 {
9866         if (!kauth_cred_issuser(kauth_cred_get()))
9867                 return EPERM;
9868
9869         vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
9870
9871         return 0;
9872 }
9873