bsd/vfs/vfs_syscalls.c

   1 /*
   2  * Copyright (c) 1995-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1989, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  * (c) UNIX System Laboratories, Inc.
  32  * All or some portions of this file are derived from material licensed
  33  * to the University of California by American Telephone and Telegraph
  34  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  35  * the permission of UNIX System Laboratories, Inc.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      @(#)vfs_syscalls.c      8.41 (Berkeley) 6/15/95
  66  */
  67 /*
  68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  69  * support for mandatory and extensible security protections.  This notice
  70  * is included in support of clause 2.2 (b) of the Apple Public License,
  71  * Version 2.0.
  72  */
  73
  74 #include <sys/param.h>
  75 #include <sys/systm.h>
  76 #include <sys/namei.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/kernel.h>
  79 #include <sys/file_internal.h>
  80 #include <sys/stat.h>
  81 #include <sys/vnode_internal.h>
  82 #include <sys/mount_internal.h>
  83 #include <sys/proc_internal.h>
  84 #include <sys/kauth.h>
  85 #include <sys/uio_internal.h>
  86 #include <sys/malloc.h>
  87 #include <sys/mman.h>
  88 #include <sys/dirent.h>
  89 #include <sys/attr.h>
  90 #include <sys/sysctl.h>
  91 #include <sys/ubc.h>
  92 #include <sys/quota.h>
  93 #include <sys/kdebug.h>
  94 #include <sys/fsevents.h>
  95 #include <sys/imgsrc.h>
  96 #include <sys/sysproto.h>
  97 #include <sys/xattr.h>
  98 #include <sys/fcntl.h>
  99 #include <sys/fsctl.h>
 100 #include <sys/ubc_internal.h>
 101 #include <sys/disk.h>
 102 #include <machine/cons.h>
 103 #include <machine/limits.h>
 104 #include <miscfs/specfs/specdev.h>
 105
 106 #include <security/audit/audit.h>
 107 #include <bsm/audit_kevents.h>
 108
 109 #include <mach/mach_types.h>
 110 #include <kern/kern_types.h>
 111 #include <kern/kalloc.h>
 112 #include <kern/task.h>
 113
 114 #include <vm/vm_pageout.h>
 115
 116 #include <libkern/OSAtomic.h>
 117 #include <pexpert/pexpert.h>
 118
 119 #if CONFIG_MACF
 120 #include <security/mac.h>
 121 #include <security/mac_framework.h>
 122 #endif
 123
 124 #if CONFIG_FSE
 125 #define GET_PATH(x) \
 126         (x) = get_pathbuff();
 127 #define RELEASE_PATH(x) \
 128         release_pathbuff(x);
 129 #else
 130 #define GET_PATH(x)     \
 131         MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
 132 #define RELEASE_PATH(x) \
 133         FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
 134 #endif /* CONFIG_FSE */
 135
 136 /* struct for checkdirs iteration */
 137 struct cdirargs {
 138         vnode_t olddp;
 139         vnode_t newdp;
 140 };
 141 /* callback  for checkdirs iteration */
 142 static int checkdirs_callback(proc_t p, void * arg);
 143
 144 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
 145 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
 146 void enablequotas(struct mount *mp, vfs_context_t ctx);
 147 static int getfsstat_callback(mount_t mp, void * arg);
 148 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
 149 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
 150 static int sync_callback(mount_t, void *);
 151 static void sync_thread(void *, __unused wait_result_t);
 152 static int sync_async(int);
 153 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
 154                         user_addr_t bufp, int *sizep, boolean_t is_64_bit,
 155                                                 boolean_t partial_copy);
 156 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
 157                         user_addr_t bufp);
 158 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
 159 static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
 160                         struct componentname *cnp, user_addr_t fsmountargs,
 161                         int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
 162                         vfs_context_t ctx);
 163 void vfs_notify_mount(vnode_t pdvp);
 164
 165 int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
 166
 167 struct fd_vn_data * fg_vn_data_alloc(void);
 168
 169 /*
 170  * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
 171  * Concurrent lookups (or lookups by ids) on hard links can cause the
 172  * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
 173  * does) to return ENOENT as the path cannot be returned from the name cache
 174  * alone. We have no option but to retry and hope to get one namei->reverse path
 175  * generation done without an intervening lookup, lookup by id on the hard link
 176  * item. This is only an issue for MAC hooks which cannot reenter the filesystem
 177  * which currently are the MAC hooks for rename, unlink and rmdir.
 178  */
 179 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
 180
 181 static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
 182
 183 static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
 184
 185 #ifdef CONFIG_IMGSRC_ACCESS
 186 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
 187 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
 188 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
 189 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
 190 static void mount_end_update(mount_t mp);
 191 static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
 192 #endif /* CONFIG_IMGSRC_ACCESS */
 193
 194 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
 195
 196 __private_extern__
 197 int sync_internal(void);
 198
 199 __private_extern__
 200 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
 201
 202 extern lck_grp_t *fd_vn_lck_grp;
 203 extern lck_grp_attr_t *fd_vn_lck_grp_attr;
 204 extern lck_attr_t *fd_vn_lck_attr;
 205
 206 /*
 207  * incremented each time a mount or unmount operation occurs
 208  * used to invalidate the cached value of the rootvp in the
 209  * mount structure utilized by cache_lookup_path
 210  */
 211 uint32_t mount_generation = 0;
 212
 213 /* counts number of mount and unmount operations */
 214 unsigned int vfs_nummntops=0;
 215
 216 extern const struct fileops vnops;
 217 #if CONFIG_APPLEDOUBLE
 218 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
 219 #endif /* CONFIG_APPLEDOUBLE */
 220
 221 typedef uint32_t vfs_rename_flags_t;
 222 #if CONFIG_SECLUDED_RENAME
 223 enum {
 224         VFS_SECLUDE_RENAME              = 0x00000001
 225 };
 226 #endif
 227
 228 /*
 229  * Virtual File System System Calls
 230  */
 231
 232 #if NFSCLIENT || DEVFS
 233 /*
 234  * Private in-kernel mounting spi (NFS only, not exported)
 235  */
 236  __private_extern__
 237 boolean_t
 238 vfs_iskernelmount(mount_t mp)
 239 {
 240         return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
 241 }
 242
 243  __private_extern__
 244 int
 245 kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
 246              void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
 247 {
 248         struct nameidata nd;
 249         boolean_t did_namei;
 250         int error;
 251
 252         NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
 253                UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
 254
 255         /*
 256          * Get the vnode to be covered if it's not supplied
 257          */
 258         if (vp == NULLVP) {
 259                 error = namei(&nd);
 260                 if (error)
 261                         return (error);
 262                 vp = nd.ni_vp;
 263                 pvp = nd.ni_dvp;
 264                 did_namei = TRUE;
 265         } else {
 266                 char *pnbuf = CAST_DOWN(char *, path);
 267
 268                 nd.ni_cnd.cn_pnbuf = pnbuf;
 269                 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
 270                 did_namei = FALSE;
 271         }
 272
 273         error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
 274                              syscall_flags, kern_flags, NULL, TRUE, ctx);
 275
 276         if (did_namei) {
 277                 vnode_put(vp);
 278                 vnode_put(pvp);
 279                 nameidone(&nd);
 280         }
 281
 282         return (error);
 283 }
 284 #endif /* NFSCLIENT || DEVFS */
 285
 286 /*
 287  * Mount a file system.
 288  */
 289 /* ARGSUSED */
 290 int
 291 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
 292 {
 293         struct __mac_mount_args muap;
 294
 295         muap.type = uap->type;
 296         muap.path = uap->path;
 297         muap.flags = uap->flags;
 298         muap.data = uap->data;
 299         muap.mac_p = USER_ADDR_NULL;
 300         return (__mac_mount(p, &muap, retval));
 301 }
 302
 303 void
 304 vfs_notify_mount(vnode_t pdvp)
 305 {
 306         vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
 307         lock_vnode_and_post(pdvp, NOTE_WRITE);
 308 }
 309
 310 /*
 311  * __mac_mount:
 312  *      Mount a file system taking into account MAC label behavior.
 313  *      See mount(2) man page for more information
 314  *
 315  * Parameters:    p                        Process requesting the mount
 316  *                uap                      User argument descriptor (see below)
 317  *                retval                   (ignored)
 318  *
 319  * Indirect:      uap->type                Filesystem type
 320  *                uap->path                Path to mount
 321  *                uap->data                Mount arguments
 322  *                uap->mac_p               MAC info
 323  *                uap->flags               Mount flags
 324  *
 325  *
 326  * Returns:        0                       Success
 327  *                !0                       Not success
 328  */
 329 boolean_t root_fs_upgrade_try = FALSE;
 330
 331 int
 332 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
 333 {
 334         vnode_t pvp = NULL;
 335         vnode_t vp = NULL;
 336         int need_nameidone = 0;
 337         vfs_context_t ctx = vfs_context_current();
 338         char fstypename[MFSNAMELEN];
 339         struct nameidata nd;
 340         size_t dummy=0;
 341         char *labelstr = NULL;
 342         int flags = uap->flags;
 343         int error;
 344 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
 345         boolean_t is_64bit = IS_64BIT_PROCESS(p);
 346 #else
 347 #pragma unused(p)
 348 #endif
 349         /*
 350          * Get the fs type name from user space
 351          */
 352         error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
 353         if (error)
 354                 return (error);
 355
 356         /*
 357          * Get the vnode to be covered
 358          */
 359         NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
 360                UIO_USERSPACE, uap->path, ctx);
 361         error = namei(&nd);
 362         if (error) {
 363                 goto out;
 364         }
 365         need_nameidone = 1;
 366         vp = nd.ni_vp;
 367         pvp = nd.ni_dvp;
 368
 369 #ifdef CONFIG_IMGSRC_ACCESS
 370         /* Mounting image source cannot be batched with other operations */
 371         if (flags == MNT_IMGSRC_BY_INDEX) {
 372                 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
 373                                                   ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
 374                 goto out;
 375         }
 376 #endif /* CONFIG_IMGSRC_ACCESS */
 377
 378 #if CONFIG_MACF
 379         /*
 380          * Get the label string (if any) from user space
 381          */
 382         if (uap->mac_p != USER_ADDR_NULL) {
 383                 struct user_mac mac;
 384                 size_t ulen = 0;
 385
 386                 if (is_64bit) {
 387                         struct user64_mac mac64;
 388                         error = copyin(uap->mac_p, &mac64, sizeof(mac64));
 389                         mac.m_buflen = mac64.m_buflen;
 390                         mac.m_string = mac64.m_string;
 391                 } else {
 392                         struct user32_mac mac32;
 393                         error = copyin(uap->mac_p, &mac32, sizeof(mac32));
 394                         mac.m_buflen = mac32.m_buflen;
 395                         mac.m_string = mac32.m_string;
 396                 }
 397                 if (error)
 398                         goto out;
 399                 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
 400                     (mac.m_buflen < 2)) {
 401                         error = EINVAL;
 402                         goto out;
 403                 }
 404                 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
 405                 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
 406                 if (error) {
 407                         goto out;
 408                 }
 409                 AUDIT_ARG(mac_string, labelstr);
 410         }
 411 #endif /* CONFIG_MACF */
 412
 413         AUDIT_ARG(fflags, flags);
 414
 415         if ((vp->v_flag & VROOT) &&
 416                         (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
 417                 if (!(flags & MNT_UNION)) {
 418                         flags |= MNT_UPDATE;
 419                 }
 420                 else {
 421                         /*
 422                          * For a union mount on '/', treat it as fresh
 423                          * mount instead of update.
 424                          * Otherwise, union mouting on '/' used to panic the
 425                          * system before, since mnt_vnodecovered was found to
 426                          * be NULL for '/' which is required for unionlookup
 427                          * after it gets ENOENT on union mount.
 428                          */
 429                         flags = (flags & ~(MNT_UPDATE));
 430                 }
 431
 432 #ifdef SECURE_KERNEL
 433                 if ((flags & MNT_RDONLY) == 0) {
 434                         /* Release kernels are not allowed to mount "/" as rw */
 435                         error = EPERM;
 436                         goto out;
 437                 }
 438 #endif
 439                 /*
 440                  * See 7392553 for more details on why this check exists.
 441                  * Suffice to say: If this check is ON and something tries
 442                  * to mount the rootFS RW, we'll turn off the codesign
 443                  * bitmap optimization.
 444                  */
 445 #if CHECK_CS_VALIDATION_BITMAP
 446                 if ((flags & MNT_RDONLY) == 0 ) {
 447                         root_fs_upgrade_try = TRUE;
 448                 }
 449 #endif
 450         }
 451
 452         error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
 453                              labelstr, FALSE, ctx);
 454
 455 out:
 456
 457 #if CONFIG_MACF
 458         if (labelstr)
 459                 FREE(labelstr, M_MACTEMP);
 460 #endif /* CONFIG_MACF */
 461
 462         if (vp) {
 463                 vnode_put(vp);
 464         }
 465         if (pvp) {
 466                 vnode_put(pvp);
 467         }
 468         if (need_nameidone) {
 469                 nameidone(&nd);
 470         }
 471
 472         return (error);
 473 }
 474
 475 /*
 476  * common mount implementation (final stage of mounting)
 477
 478  * Arguments:
 479  *  fstypename  file system type (ie it's vfs name)
 480  *  pvp         parent of covered vnode
 481  *  vp          covered vnode
 482  *  cnp         component name (ie path) of covered vnode
 483  *  flags       generic mount flags
 484  *  fsmountargs file system specific data
 485  *  labelstr    optional MAC label
 486  *  kernelmount TRUE for mounts initiated from inside the kernel
 487  *  ctx         caller's context
 488  */
 489 static int
 490 mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
 491              struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
 492              char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
 493 {
 494 #if !CONFIG_MACF
 495 #pragma unused(labelstr)
 496 #endif
 497         struct vnode *devvp = NULLVP;
 498         struct vnode *device_vnode = NULLVP;
 499 #if CONFIG_MACF
 500         struct vnode *rvp;
 501 #endif
 502         struct mount *mp;
 503         struct vfstable *vfsp = (struct vfstable *)0;
 504         struct proc *p = vfs_context_proc(ctx);
 505         int error, flag = 0;
 506         user_addr_t devpath = USER_ADDR_NULL;
 507         int ronly = 0;
 508         int mntalloc = 0;
 509         boolean_t vfsp_ref = FALSE;
 510         boolean_t is_rwlock_locked = FALSE;
 511         boolean_t did_rele = FALSE;
 512         boolean_t have_usecount = FALSE;
 513
 514         /*
 515          * Process an update for an existing mount
 516          */
 517         if (flags & MNT_UPDATE) {
 518                 if ((vp->v_flag & VROOT) == 0) {
 519                         error = EINVAL;
 520                         goto out1;
 521                 }
 522                 mp = vp->v_mount;
 523
 524                 /* unmount in progress return error */
 525                 mount_lock_spin(mp);
 526                 if (mp->mnt_lflag & MNT_LUNMOUNT) {
 527                         mount_unlock(mp);
 528                         error = EBUSY;
 529                         goto out1;
 530                 }
 531                 mount_unlock(mp);
 532                 lck_rw_lock_exclusive(&mp->mnt_rwlock);
 533                 is_rwlock_locked = TRUE;
 534                 /*
 535                  * We only allow the filesystem to be reloaded if it
 536                  * is currently mounted read-only.
 537                  */
 538                 if ((flags & MNT_RELOAD) &&
 539                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
 540                         error = ENOTSUP;
 541                         goto out1;
 542                 }
 543
 544                 /*
 545                  * If content protection is enabled, update mounts are not
 546                  * allowed to turn it off.
 547                  */
 548                 if ((mp->mnt_flag & MNT_CPROTECT) &&
 549                            ((flags & MNT_CPROTECT) == 0)) {
 550                         error = EINVAL;
 551                         goto out1;
 552                 }
 553
 554 #ifdef CONFIG_IMGSRC_ACCESS
 555                 /* Can't downgrade the backer of the root FS */
 556                 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
 557                         (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
 558                         error = ENOTSUP;
 559                         goto out1;
 560                 }
 561 #endif /* CONFIG_IMGSRC_ACCESS */
 562
 563                 /*
 564                  * Only root, or the user that did the original mount is
 565                  * permitted to update it.
 566                  */
 567                 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
 568                     (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
 569                         goto out1;
 570                 }
 571 #if CONFIG_MACF
 572                 error = mac_mount_check_remount(ctx, mp);
 573                 if (error != 0) {
 574                         goto out1;
 575                 }
 576 #endif
 577                 /*
 578                  * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
 579                  * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
 580                  */
 581                 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
 582                         flags |= MNT_NOSUID | MNT_NODEV;
 583                         if (mp->mnt_flag & MNT_NOEXEC)
 584                                 flags |= MNT_NOEXEC;
 585                 }
 586                 flag = mp->mnt_flag;
 587
 588
 589
 590                 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
 591
 592                 vfsp = mp->mnt_vtable;
 593                 goto update;
 594         }
 595         /*
 596          * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
 597          * MNT_NOEXEC if mount point is already MNT_NOEXEC.
 598          */
 599         if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
 600                 flags |= MNT_NOSUID | MNT_NODEV;
 601                 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
 602                         flags |= MNT_NOEXEC;
 603         }
 604
 605         /* XXXAUDIT: Should we capture the type on the error path as well? */
 606         AUDIT_ARG(text, fstypename);
 607         mount_list_lock();
 608         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 609                 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
 610                         vfsp->vfc_refcount++;
 611                         vfsp_ref = TRUE;
 612                         break;
 613                 }
 614         mount_list_unlock();
 615         if (vfsp == NULL) {
 616                 error = ENODEV;
 617                 goto out1;
 618         }
 619
 620         /*
 621          * VFC_VFSLOCALARGS is not currently supported for kernel mounts
 622          */
 623         if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
 624                 error = EINVAL;  /* unsupported request */
 625                 goto out1;
 626         }
 627
 628         error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
 629         if (error != 0) {
 630                 goto out1;
 631         }
 632
 633         /*
 634          * Allocate and initialize the filesystem (mount_t)
 635          */
 636         MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
 637                 M_MOUNT, M_WAITOK);
 638         bzero((char *)mp, (u_int32_t)sizeof(struct mount));
 639         mntalloc = 1;
 640
 641         /* Initialize the default IO constraints */
 642         mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
 643         mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
 644         mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
 645         mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
 646         mp->mnt_devblocksize = DEV_BSIZE;
 647         mp->mnt_alignmentmask = PAGE_MASK;
 648         mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
 649         mp->mnt_ioscale = 1;
 650         mp->mnt_ioflags = 0;
 651         mp->mnt_realrootvp = NULLVP;
 652         mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
 653
 654         TAILQ_INIT(&mp->mnt_vnodelist);
 655         TAILQ_INIT(&mp->mnt_workerqueue);
 656         TAILQ_INIT(&mp->mnt_newvnodes);
 657         mount_lock_init(mp);
 658         lck_rw_lock_exclusive(&mp->mnt_rwlock);
 659         is_rwlock_locked = TRUE;
 660         mp->mnt_op = vfsp->vfc_vfsops;
 661         mp->mnt_vtable = vfsp;
 662         //mp->mnt_stat.f_type = vfsp->vfc_typenum;
 663         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 664         strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
 665         strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
 666         mp->mnt_vnodecovered = vp;
 667         mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
 668         mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
 669         mp->mnt_devbsdunit = 0;
 670
 671         /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
 672         vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
 673
 674 #if NFSCLIENT || DEVFS
 675         if (kernelmount)
 676                 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
 677         if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
 678                 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
 679 #endif /* NFSCLIENT || DEVFS */
 680
 681 update:
 682         /*
 683          * Set the mount level flags.
 684          */
 685         if (flags & MNT_RDONLY)
 686                 mp->mnt_flag |= MNT_RDONLY;
 687         else if (mp->mnt_flag & MNT_RDONLY) {
 688                 // disallow read/write upgrades of file systems that
 689                 // had the TYPENAME_OVERRIDE feature set.
 690                 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
 691                         error = EPERM;
 692                         goto out1;
 693                 }
 694                 mp->mnt_kern_flag |= MNTK_WANTRDWR;
 695         }
 696         mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 697                           MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 698                           MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
 699                           MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
 700                           MNT_QUARANTINE | MNT_CPROTECT);
 701         mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 702                                  MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 703                                  MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
 704                                  MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
 705                                  MNT_QUARANTINE | MNT_CPROTECT);
 706
 707 #if CONFIG_MACF
 708         if (flags & MNT_MULTILABEL) {
 709                 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
 710                         error = EINVAL;
 711                         goto out1;
 712                 }
 713                 mp->mnt_flag |= MNT_MULTILABEL;
 714         }
 715 #endif
 716         /*
 717          * Process device path for local file systems if requested
 718          */
 719         if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
 720                 if (vfs_context_is64bit(ctx)) {
 721                         if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
 722                                 goto out1;
 723                         fsmountargs += sizeof(devpath);
 724                 } else {
 725                         user32_addr_t tmp;
 726                         if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
 727                                 goto out1;
 728                         /* munge into LP64 addr */
 729                         devpath = CAST_USER_ADDR_T(tmp);
 730                         fsmountargs += sizeof(tmp);
 731                 }
 732
 733                 /* Lookup device and authorize access to it */
 734                 if ((devpath)) {
 735                         struct nameidata nd;
 736
 737                         NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
 738                         if ( (error = namei(&nd)) )
 739                                 goto out1;
 740
 741                         strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
 742                         devvp = nd.ni_vp;
 743
 744                         nameidone(&nd);
 745
 746                         if (devvp->v_type != VBLK) {
 747                                 error = ENOTBLK;
 748                                 goto out2;
 749                         }
 750                         if (major(devvp->v_rdev) >= nblkdev) {
 751                                 error = ENXIO;
 752                                 goto out2;
 753                         }
 754                         /*
 755                         * If mount by non-root, then verify that user has necessary
 756                         * permissions on the device.
 757                         */
 758                         if (suser(vfs_context_ucred(ctx), NULL) != 0) {
 759                                 mode_t accessmode = KAUTH_VNODE_READ_DATA;
 760
 761                                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
 762                                         accessmode |= KAUTH_VNODE_WRITE_DATA;
 763                                 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
 764                                         goto out2;
 765                         }
 766                 }
 767                 /* On first mount, preflight and open device */
 768                 if (devpath && ((flags & MNT_UPDATE) == 0)) {
 769                         if ( (error = vnode_ref(devvp)) )
 770                                 goto out2;
 771                         /*
 772                         * Disallow multiple mounts of the same device.
 773                         * Disallow mounting of a device that is currently in use
 774                         * (except for root, which might share swap device for miniroot).
 775                         * Flush out any old buffers remaining from a previous use.
 776                         */
 777                         if ( (error = vfs_mountedon(devvp)) )
 778                                 goto out3;
 779
 780                         if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
 781                                 error = EBUSY;
 782                                 goto out3;
 783                         }
 784                         if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
 785                                 error = ENOTBLK;
 786                                 goto out3;
 787                         }
 788                         if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
 789                                 goto out3;
 790
 791                         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 792 #if CONFIG_MACF
 793                         error = mac_vnode_check_open(ctx,
 794                             devvp,
 795                             ronly ? FREAD : FREAD|FWRITE);
 796                         if (error)
 797                                 goto out3;
 798 #endif /* MAC */
 799                         if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
 800                                 goto out3;
 801
 802                         mp->mnt_devvp = devvp;
 803                         device_vnode = devvp;
 804
 805                 } else if ((mp->mnt_flag & MNT_RDONLY) &&
 806                            (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
 807                            (device_vnode = mp->mnt_devvp)) {
 808                         dev_t dev;
 809                         int maj;
 810                         /*
 811                          * If upgrade to read-write by non-root, then verify
 812                          * that user has necessary permissions on the device.
 813                          */
 814                         vnode_getalways(device_vnode);
 815
 816                         if (suser(vfs_context_ucred(ctx), NULL) &&
 817                             (error = vnode_authorize(device_vnode, NULL,
 818                              KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
 819                              ctx)) != 0) {
 820                                 vnode_put(device_vnode);
 821                                 goto out2;
 822                         }
 823
 824                         /* Tell the device that we're upgrading */
 825                         dev = (dev_t)device_vnode->v_rdev;
 826                         maj = major(dev);
 827
 828                         if ((u_int)maj >= (u_int)nblkdev)
 829                                 panic("Volume mounted on a device with invalid major number.");
 830
 831                         error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
 832                         vnode_put(device_vnode);
 833                         device_vnode = NULLVP;
 834                         if (error != 0) {
 835                                 goto out2;
 836                         }
 837                 }
 838         }
 839 #if CONFIG_MACF
 840         if ((flags & MNT_UPDATE) == 0) {
 841                 mac_mount_label_init(mp);
 842                 mac_mount_label_associate(ctx, mp);
 843         }
 844         if (labelstr) {
 845                 if ((flags & MNT_UPDATE) != 0) {
 846                         error = mac_mount_check_label_update(ctx, mp);
 847                         if (error != 0)
 848                                 goto out3;
 849                 }
 850         }
 851 #endif
 852         /*
 853          * Mount the filesystem.
 854          */
 855         error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
 856
 857         if (flags & MNT_UPDATE) {
 858                 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
 859                         mp->mnt_flag &= ~MNT_RDONLY;
 860                 mp->mnt_flag &=~
 861                     (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
 862                 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
 863                 if (error)
 864                         mp->mnt_flag = flag;  /* restore flag value */
 865                 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
 866                 lck_rw_done(&mp->mnt_rwlock);
 867                 is_rwlock_locked = FALSE;
 868                 if (!error)
 869                         enablequotas(mp, ctx);
 870                 goto exit;
 871         }
 872
 873         /*
 874          * Put the new filesystem on the mount list after root.
 875          */
 876         if (error == 0) {
 877                 struct vfs_attr vfsattr;
 878 #if CONFIG_MACF
 879                 if (vfs_flags(mp) & MNT_MULTILABEL) {
 880                         error = VFS_ROOT(mp, &rvp, ctx);
 881                         if (error) {
 882                                 printf("%s() VFS_ROOT returned %d\n", __func__, error);
 883                                 goto out3;
 884                         }
 885                         error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
 886                         /*
 887                          * drop reference provided by VFS_ROOT
 888                          */
 889                         vnode_put(rvp);
 890
 891                         if (error)
 892                                 goto out3;
 893                 }
 894 #endif  /* MAC */
 895
 896                 vnode_lock_spin(vp);
 897                 CLR(vp->v_flag, VMOUNT);
 898                 vp->v_mountedhere = mp;
 899                 vnode_unlock(vp);
 900
 901                 /*
 902                  * taking the name_cache_lock exclusively will
 903                  * insure that everyone is out of the fast path who
 904                  * might be trying to use a now stale copy of
 905                  * vp->v_mountedhere->mnt_realrootvp
 906                  * bumping mount_generation causes the cached values
 907                  * to be invalidated
 908                  */
 909                 name_cache_lock();
 910                 mount_generation++;
 911                 name_cache_unlock();
 912
 913                 error = vnode_ref(vp);
 914                 if (error != 0) {
 915                         goto out4;
 916                 }
 917
 918                 have_usecount = TRUE;
 919
 920                 error = checkdirs(vp, ctx);
 921                 if (error != 0)  {
 922                         /* Unmount the filesystem as cdir/rdirs cannot be updated */
 923                         goto out4;
 924                 }
 925                 /*
 926                  * there is no cleanup code here so I have made it void
 927                  * we need to revisit this
 928                  */
 929                 (void)VFS_START(mp, 0, ctx);
 930
 931                 if (mount_list_add(mp) != 0) {
 932                         /*
 933                          * The system is shutting down trying to umount
 934                          * everything, so fail with a plausible errno.
 935                          */
 936                         error = EBUSY;
 937                         goto out4;
 938                 }
 939                 lck_rw_done(&mp->mnt_rwlock);
 940                 is_rwlock_locked = FALSE;
 941
 942                 /* Check if this mounted file system supports EAs or named streams. */
 943                 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
 944                 VFSATTR_INIT(&vfsattr);
 945                 VFSATTR_WANTED(&vfsattr, f_capabilities);
 946                 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
 947                     vfs_getattr(mp, &vfsattr, ctx) == 0 &&
 948                     VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
 949                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
 950                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
 951                                 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 952                         }
 953 #if NAMEDSTREAMS
 954                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
 955                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
 956                                 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
 957                         }
 958 #endif
 959                         /* Check if this file system supports path from id lookups. */
 960                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
 961                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
 962                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 963                         } else if (mp->mnt_flag & MNT_DOVOLFS) {
 964                                 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
 965                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 966                         }
 967                 }
 968                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
 969                         mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 970                 }
 971                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
 972                         mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
 973                 }
 974                 /* increment the operations count */
 975                 OSAddAtomic(1, &vfs_nummntops);
 976                 enablequotas(mp, ctx);
 977
 978                 if (device_vnode) {
 979                         device_vnode->v_specflags |= SI_MOUNTEDON;
 980
 981                         /*
 982                          *   cache the IO attributes for the underlying physical media...
 983                          *   an error return indicates the underlying driver doesn't
 984                          *   support all the queries necessary... however, reasonable
 985                          *   defaults will have been set, so no reason to bail or care
 986                          */
 987                         vfs_init_io_attributes(device_vnode, mp);
 988                 }
 989
 990                 /* Now that mount is setup, notify the listeners */
 991                 vfs_notify_mount(pvp);
 992         } else {
 993                 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
 994                 if (mp->mnt_vnodelist.tqh_first != NULL) {
 995                         panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
 996                                         mp->mnt_vtable->vfc_name, error);
 997                 }
 998
 999                 vnode_lock_spin(vp);
1000                 CLR(vp->v_flag, VMOUNT);
1001                 vnode_unlock(vp);
1002                 mount_list_lock();
1003                 mp->mnt_vtable->vfc_refcount--;
1004                 mount_list_unlock();
1005
1006                 if (device_vnode ) {
1007                         vnode_rele(device_vnode);
1008                         VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
1009                 }
1010                 lck_rw_done(&mp->mnt_rwlock);
1011                 is_rwlock_locked = FALSE;
1012
1013                 /*
1014                  * if we get here, we have a mount structure that needs to be freed,
1015                  * but since the coveredvp hasn't yet been updated to point at it,
1016                  * no need to worry about other threads holding a crossref on this mp
1017                  * so it's ok to just free it
1018                  */
1019                 mount_lock_destroy(mp);
1020 #if CONFIG_MACF
1021                 mac_mount_label_destroy(mp);
1022 #endif
1023                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1024         }
1025 exit:
1026         /*
1027          * drop I/O count on the device vp if there was one
1028          */
1029         if (devpath && devvp)
1030                 vnode_put(devvp);
1031
1032         return(error);
1033
1034 /* Error condition exits */
1035 out4:
1036         (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
1037
1038         /*
1039          * If the mount has been placed on the covered vp,
1040          * it may have been discovered by now, so we have
1041          * to treat this just like an unmount
1042          */
1043         mount_lock_spin(mp);
1044         mp->mnt_lflag |= MNT_LDEAD;
1045         mount_unlock(mp);
1046
1047         if (device_vnode != NULLVP) {
1048                 vnode_rele(device_vnode);
1049                 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1050                        ctx);
1051                 did_rele = TRUE;
1052         }
1053
1054         vnode_lock_spin(vp);
1055
1056         mp->mnt_crossref++;
1057         vp->v_mountedhere = (mount_t) 0;
1058
1059         vnode_unlock(vp);
1060
1061         if (have_usecount) {
1062                 vnode_rele(vp);
1063         }
1064 out3:
1065         if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
1066                 vnode_rele(devvp);
1067 out2:
1068         if (devpath && devvp)
1069                 vnode_put(devvp);
1070 out1:
1071         /* Release mnt_rwlock only when it was taken */
1072         if (is_rwlock_locked == TRUE) {
1073                 lck_rw_done(&mp->mnt_rwlock);
1074         }
1075
1076         if (mntalloc) {
1077                 if (mp->mnt_crossref)
1078                         mount_dropcrossref(mp, vp, 0);
1079                 else {
1080                         mount_lock_destroy(mp);
1081 #if CONFIG_MACF
1082                         mac_mount_label_destroy(mp);
1083 #endif
1084                         FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1085                 }
1086         }
1087         if (vfsp_ref) {
1088                 mount_list_lock();
1089                 vfsp->vfc_refcount--;
1090                 mount_list_unlock();
1091         }
1092
1093         return(error);
1094 }
1095
1096 /*
1097  * Flush in-core data, check for competing mount attempts,
1098  * and set VMOUNT
1099  */
1100 int
1101 prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
1102 {
1103 #if !CONFIG_MACF
1104 #pragma unused(cnp,fsname)
1105 #endif
1106         struct vnode_attr va;
1107         int error;
1108
1109         if (!skip_auth) {
1110                 /*
1111                  * If the user is not root, ensure that they own the directory
1112                  * onto which we are attempting to mount.
1113                  */
1114                 VATTR_INIT(&va);
1115                 VATTR_WANTED(&va, va_uid);
1116                 if ((error = vnode_getattr(vp, &va, ctx)) ||
1117                                 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1118                                  (!vfs_context_issuser(ctx)))) {
1119                         error = EPERM;
1120                         goto out;
1121                 }
1122         }
1123
1124         if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1125                 goto out;
1126
1127         if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1128                 goto out;
1129
1130         if (vp->v_type != VDIR) {
1131                 error = ENOTDIR;
1132                 goto out;
1133         }
1134
1135         if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1136                 error = EBUSY;
1137                 goto out;
1138         }
1139
1140 #if CONFIG_MACF
1141         error = mac_mount_check_mount(ctx, vp,
1142             cnp, fsname);
1143         if (error != 0)
1144                 goto out;
1145 #endif
1146
1147         vnode_lock_spin(vp);
1148         SET(vp->v_flag, VMOUNT);
1149         vnode_unlock(vp);
1150
1151 out:
1152         return error;
1153 }
1154
1155 #if CONFIG_IMGSRC_ACCESS
1156
1157 #if DEBUG
1158 #define IMGSRC_DEBUG(args...) printf(args)
1159 #else
1160 #define IMGSRC_DEBUG(args...) do { } while(0)
1161 #endif
1162
1163 static int
1164 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1165 {
1166         struct nameidata nd;
1167         vnode_t vp, realdevvp;
1168         mode_t accessmode;
1169         int error;
1170
1171         NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1172         if ( (error = namei(&nd)) ) {
1173                 IMGSRC_DEBUG("namei() failed with %d\n", error);
1174                 return error;
1175         }
1176
1177         vp = nd.ni_vp;
1178
1179         if (!vnode_isblk(vp)) {
1180                 IMGSRC_DEBUG("Not block device.\n");
1181                 error = ENOTBLK;
1182                 goto out;
1183         }
1184
1185         realdevvp = mp->mnt_devvp;
1186         if (realdevvp == NULLVP) {
1187                 IMGSRC_DEBUG("No device backs the mount.\n");
1188                 error = ENXIO;
1189                 goto out;
1190         }
1191
1192         error = vnode_getwithref(realdevvp);
1193         if (error != 0) {
1194                 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1195                 goto out;
1196         }
1197
1198         if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1199                 IMGSRC_DEBUG("Wrong dev_t.\n");
1200                 error = ENXIO;
1201                 goto out1;
1202         }
1203
1204         strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1205
1206         /*
1207          * If mount by non-root, then verify that user has necessary
1208          * permissions on the device.
1209          */
1210         if (!vfs_context_issuser(ctx)) {
1211                 accessmode = KAUTH_VNODE_READ_DATA;
1212                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1213                         accessmode |= KAUTH_VNODE_WRITE_DATA;
1214                 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1215                         IMGSRC_DEBUG("Access denied.\n");
1216                         goto out1;
1217                 }
1218         }
1219
1220         *devvpp = vp;
1221
1222 out1:
1223         vnode_put(realdevvp);
1224 out:
1225         nameidone(&nd);
1226         if (error) {
1227                 vnode_put(vp);
1228         }
1229
1230         return error;
1231 }
1232
1233 /*
1234  * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1235  * and call checkdirs()
1236  */
1237 static int
1238 place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1239 {
1240         int error;
1241
1242         mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1243
1244         vnode_lock_spin(vp);
1245         CLR(vp->v_flag, VMOUNT);
1246         vp->v_mountedhere = mp;
1247         vnode_unlock(vp);
1248
1249         /*
1250          * taking the name_cache_lock exclusively will
1251          * insure that everyone is out of the fast path who
1252          * might be trying to use a now stale copy of
1253          * vp->v_mountedhere->mnt_realrootvp
1254          * bumping mount_generation causes the cached values
1255          * to be invalidated
1256          */
1257         name_cache_lock();
1258         mount_generation++;
1259         name_cache_unlock();
1260
1261         error = vnode_ref(vp);
1262         if (error != 0) {
1263                 goto out;
1264         }
1265
1266         error = checkdirs(vp, ctx);
1267         if (error != 0)  {
1268                 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1269                 vnode_rele(vp);
1270                 goto out;
1271         }
1272
1273 out:
1274         if (error != 0) {
1275                 mp->mnt_vnodecovered = NULLVP;
1276         }
1277         return error;
1278 }
1279
1280 static void
1281 undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1282 {
1283         vnode_rele(vp);
1284         vnode_lock_spin(vp);
1285         vp->v_mountedhere = (mount_t)NULL;
1286         vnode_unlock(vp);
1287
1288         mp->mnt_vnodecovered = NULLVP;
1289 }
1290
1291 static int
1292 mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1293 {
1294         int error;
1295
1296         /* unmount in progress return error */
1297         mount_lock_spin(mp);
1298         if (mp->mnt_lflag & MNT_LUNMOUNT) {
1299                 mount_unlock(mp);
1300                 return EBUSY;
1301         }
1302         mount_unlock(mp);
1303         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1304
1305         /*
1306          * We only allow the filesystem to be reloaded if it
1307          * is currently mounted read-only.
1308          */
1309         if ((flags & MNT_RELOAD) &&
1310                         ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1311                 error = ENOTSUP;
1312                 goto out;
1313         }
1314
1315         /*
1316          * Only root, or the user that did the original mount is
1317          * permitted to update it.
1318          */
1319         if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1320                         (!vfs_context_issuser(ctx))) {
1321                 error = EPERM;
1322                 goto out;
1323         }
1324 #if CONFIG_MACF
1325         error = mac_mount_check_remount(ctx, mp);
1326         if (error != 0) {
1327                 goto out;
1328         }
1329 #endif
1330
1331 out:
1332         if (error) {
1333                 lck_rw_done(&mp->mnt_rwlock);
1334         }
1335
1336         return error;
1337 }
1338
1339 static void
1340 mount_end_update(mount_t mp)
1341 {
1342         lck_rw_done(&mp->mnt_rwlock);
1343 }
1344
1345 static int
1346 get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1347 {
1348         vnode_t vp;
1349
1350         if (height >= MAX_IMAGEBOOT_NESTING) {
1351                 return EINVAL;
1352         }
1353
1354         vp = imgsrc_rootvnodes[height];
1355         if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1356                 *rvpp = vp;
1357                 return 0;
1358         } else {
1359                 return ENOENT;
1360         }
1361 }
1362
1363 static int
1364 relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1365                 const char *fsname, vfs_context_t ctx,
1366                 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
1367 {
1368         int error;
1369         mount_t mp;
1370         boolean_t placed = FALSE;
1371         vnode_t devvp = NULLVP;
1372         struct vfstable *vfsp;
1373         user_addr_t devpath;
1374         char *old_mntonname;
1375         vnode_t rvp;
1376         uint32_t height;
1377         uint32_t flags;
1378
1379         /* If we didn't imageboot, nothing to move */
1380         if (imgsrc_rootvnodes[0] == NULLVP) {
1381                 return EINVAL;
1382         }
1383
1384         /* Only root can do this */
1385         if (!vfs_context_issuser(ctx)) {
1386                 return EPERM;
1387         }
1388
1389         IMGSRC_DEBUG("looking for root vnode.\n");
1390
1391         /*
1392          * Get root vnode of filesystem we're moving.
1393          */
1394         if (by_index) {
1395                 if (is64bit) {
1396                         struct user64_mnt_imgsrc_args mia64;
1397                         error = copyin(fsmountargs, &mia64, sizeof(mia64));
1398                         if (error != 0) {
1399                                 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1400                                 return error;
1401                         }
1402
1403                         height = mia64.mi_height;
1404                         flags = mia64.mi_flags;
1405                         devpath = mia64.mi_devpath;
1406                 } else {
1407                         struct user32_mnt_imgsrc_args mia32;
1408                         error = copyin(fsmountargs, &mia32, sizeof(mia32));
1409                         if (error != 0) {
1410                                 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1411                                 return error;
1412                         }
1413
1414                         height = mia32.mi_height;
1415                         flags = mia32.mi_flags;
1416                         devpath = mia32.mi_devpath;
1417                 }
1418         } else {
1419                 /*
1420                  * For binary compatibility--assumes one level of nesting.
1421                  */
1422                 if (is64bit) {
1423                         if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1424                                 return error;
1425                 } else {
1426                         user32_addr_t tmp;
1427                         if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1428                                 return error;
1429
1430                         /* munge into LP64 addr */
1431                         devpath = CAST_USER_ADDR_T(tmp);
1432                 }
1433
1434                 height = 0;
1435                 flags = 0;
1436         }
1437
1438         if (flags != 0) {
1439                 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1440                 return EINVAL;
1441         }
1442
1443         error = get_imgsrc_rootvnode(height, &rvp);
1444         if (error != 0) {
1445                 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
1446                 return error;
1447         }
1448
1449         IMGSRC_DEBUG("got root vnode.\n");
1450
1451         MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1452
1453         /* Can only move once */
1454         mp = vnode_mount(rvp);
1455         if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1456                 IMGSRC_DEBUG("Already moved.\n");
1457                 error = EBUSY;
1458                 goto out0;
1459         }
1460
1461         IMGSRC_DEBUG("Starting updated.\n");
1462
1463         /* Get exclusive rwlock on mount, authorize update on mp */
1464         error = mount_begin_update(mp , ctx, 0);
1465         if (error != 0) {
1466                 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
1467                 goto out0;
1468         }
1469
1470         /*
1471          * It can only be moved once.  Flag is set under the rwlock,
1472          * so we're now safe to proceed.
1473          */
1474         if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1475                 IMGSRC_DEBUG("Already moved [2]\n");
1476                 goto out1;
1477         }
1478
1479
1480         IMGSRC_DEBUG("Preparing coveredvp.\n");
1481
1482         /* Mark covered vnode as mount in progress, authorize placing mount on top */
1483         error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
1484         if (error != 0) {
1485                 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
1486                 goto out1;
1487         }
1488
1489         IMGSRC_DEBUG("Covered vp OK.\n");
1490
1491         /* Sanity check the name caller has provided */
1492         vfsp = mp->mnt_vtable;
1493         if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1494                 IMGSRC_DEBUG("Wrong fs name.\n");
1495                 error = EINVAL;
1496                 goto out2;
1497         }
1498
1499         /* Check the device vnode and update mount-from name, for local filesystems */
1500         if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1501                 IMGSRC_DEBUG("Local, doing device validation.\n");
1502
1503                 if (devpath != USER_ADDR_NULL) {
1504                         error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1505                         if (error) {
1506                                 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1507                                 goto out2;
1508                         }
1509
1510                         vnode_put(devvp);
1511                 }
1512         }
1513
1514         /*
1515          * Place mp on top of vnode, ref the vnode,  call checkdirs(),
1516          * and increment the name cache's mount generation
1517          */
1518
1519         IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1520         error = place_mount_and_checkdirs(mp, vp, ctx);
1521         if (error != 0) {
1522                 goto out2;
1523         }
1524
1525         placed = TRUE;
1526
1527         strncpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1528         strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1529
1530         /* Forbid future moves */
1531         mount_lock(mp);
1532         mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1533         mount_unlock(mp);
1534
1535         /* Finally, add to mount list, completely ready to go */
1536         if (mount_list_add(mp) != 0) {
1537                 /*
1538                  * The system is shutting down trying to umount
1539                  * everything, so fail with a plausible errno.
1540                  */
1541                 error = EBUSY;
1542                 goto out3;
1543         }
1544
1545         mount_end_update(mp);
1546         vnode_put(rvp);
1547         FREE(old_mntonname, M_TEMP);
1548
1549         vfs_notify_mount(pvp);
1550
1551         return 0;
1552 out3:
1553         strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1554
1555         mount_lock(mp);
1556         mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1557         mount_unlock(mp);
1558
1559 out2:
1560         /*
1561          * Placing the mp on the vnode clears VMOUNT,
1562          * so cleanup is different after that point
1563          */
1564         if (placed) {
1565                 /* Rele the vp, clear VMOUNT and v_mountedhere */
1566                 undo_place_on_covered_vp(mp, vp);
1567         } else {
1568                 vnode_lock_spin(vp);
1569                 CLR(vp->v_flag, VMOUNT);
1570                 vnode_unlock(vp);
1571         }
1572 out1:
1573         mount_end_update(mp);
1574
1575 out0:
1576         vnode_put(rvp);
1577         FREE(old_mntonname, M_TEMP);
1578         return error;
1579 }
1580
1581 #endif /* CONFIG_IMGSRC_ACCESS */
1582
1583 void
1584 enablequotas(struct mount *mp, vfs_context_t ctx)
1585 {
1586         struct nameidata qnd;
1587         int type;
1588         char qfpath[MAXPATHLEN];
1589         const char *qfname = QUOTAFILENAME;
1590         const char *qfopsname = QUOTAOPSNAME;
1591         const char *qfextension[] = INITQFNAMES;
1592
1593         /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1594         if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1595                 return;
1596         }
1597         /*
1598          * Enable filesystem disk quotas if necessary.
1599          * We ignore errors as this should not interfere with final mount
1600          */
1601         for (type=0; type < MAXQUOTAS; type++) {
1602                 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1603                 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1604                        CAST_USER_ADDR_T(qfpath), ctx);
1605                 if (namei(&qnd) != 0)
1606                         continue;           /* option file to trigger quotas is not present */
1607                 vnode_put(qnd.ni_vp);
1608                 nameidone(&qnd);
1609                 snprintf(qfpath, sizeof(qfpath),  "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1610
1611                 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1612         }
1613         return;
1614 }
1615
1616
1617 static int
1618 checkdirs_callback(proc_t p, void * arg)
1619 {
1620         struct cdirargs * cdrp = (struct cdirargs * )arg;
1621         vnode_t olddp = cdrp->olddp;
1622         vnode_t newdp = cdrp->newdp;
1623         struct filedesc *fdp;
1624         vnode_t tvp;
1625         vnode_t fdp_cvp;
1626         vnode_t fdp_rvp;
1627         int cdir_changed = 0;
1628         int rdir_changed = 0;
1629
1630         /*
1631          * XXX Also needs to iterate each thread in the process to see if it
1632          * XXX is using a per-thread current working directory, and, if so,
1633          * XXX update that as well.
1634          */
1635
1636         proc_fdlock(p);
1637         fdp = p->p_fd;
1638         if (fdp == (struct filedesc *)0) {
1639                 proc_fdunlock(p);
1640                 return(PROC_RETURNED);
1641         }
1642         fdp_cvp = fdp->fd_cdir;
1643         fdp_rvp = fdp->fd_rdir;
1644         proc_fdunlock(p);
1645
1646         if (fdp_cvp == olddp) {
1647                 vnode_ref(newdp);
1648                 tvp = fdp->fd_cdir;
1649                 fdp_cvp = newdp;
1650                 cdir_changed = 1;
1651                 vnode_rele(tvp);
1652         }
1653         if (fdp_rvp == olddp) {
1654                 vnode_ref(newdp);
1655                 tvp = fdp->fd_rdir;
1656                 fdp_rvp = newdp;
1657                 rdir_changed = 1;
1658                 vnode_rele(tvp);
1659         }
1660         if (cdir_changed || rdir_changed) {
1661                 proc_fdlock(p);
1662                 fdp->fd_cdir = fdp_cvp;
1663                 fdp->fd_rdir = fdp_rvp;
1664                 proc_fdunlock(p);
1665         }
1666         return(PROC_RETURNED);
1667 }
1668
1669
1670
1671 /*
1672  * Scan all active processes to see if any of them have a current
1673  * or root directory onto which the new filesystem has just been
1674  * mounted. If so, replace them with the new mount point.
1675  */
1676 static int
1677 checkdirs(vnode_t olddp, vfs_context_t ctx)
1678 {
1679         vnode_t newdp;
1680         vnode_t tvp;
1681         int err;
1682         struct cdirargs cdr;
1683
1684         if (olddp->v_usecount == 1)
1685                 return(0);
1686         err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
1687
1688         if (err != 0) {
1689 #if DIAGNOSTIC
1690                 panic("mount: lost mount: error %d", err);
1691 #endif
1692                 return(err);
1693         }
1694
1695         cdr.olddp = olddp;
1696         cdr.newdp = newdp;
1697         /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1698         proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
1699
1700         if (rootvnode == olddp) {
1701                 vnode_ref(newdp);
1702                 tvp = rootvnode;
1703                 rootvnode = newdp;
1704                 vnode_rele(tvp);
1705         }
1706
1707         vnode_put(newdp);
1708         return(0);
1709 }
1710
1711 /*
1712  * Unmount a file system.
1713  *
1714  * Note: unmount takes a path to the vnode mounted on as argument,
1715  * not special file (as before).
1716  */
1717 /* ARGSUSED */
1718 int
1719 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1720 {
1721         vnode_t vp;
1722         struct mount *mp;
1723         int error;
1724         struct nameidata nd;
1725         vfs_context_t ctx = vfs_context_current();
1726
1727         NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
1728                 UIO_USERSPACE, uap->path, ctx);
1729         error = namei(&nd);
1730         if (error)
1731                 return (error);
1732         vp = nd.ni_vp;
1733         mp = vp->v_mount;
1734         nameidone(&nd);
1735
1736 #if CONFIG_MACF
1737         error = mac_mount_check_umount(ctx, mp);
1738         if (error != 0) {
1739                 vnode_put(vp);
1740                 return (error);
1741         }
1742 #endif
1743         /*
1744          * Must be the root of the filesystem
1745          */
1746         if ((vp->v_flag & VROOT) == 0) {
1747                 vnode_put(vp);
1748                 return (EINVAL);
1749         }
1750         mount_ref(mp, 0);
1751         vnode_put(vp);
1752         /* safedounmount consumes the mount ref */
1753         return (safedounmount(mp, uap->flags, ctx));
1754 }
1755
1756 int
1757 vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
1758 {
1759         mount_t mp;
1760
1761         mp = mount_list_lookupby_fsid(fsid, 0, 1);
1762         if (mp == (mount_t)0) {
1763                 return(ENOENT);
1764         }
1765         mount_ref(mp, 0);
1766         mount_iterdrop(mp);
1767         /* safedounmount consumes the mount ref */
1768         return(safedounmount(mp, flags, ctx));
1769 }
1770
1771
1772 /*
1773  * The mount struct comes with a mount ref which will be consumed.
1774  * Do the actual file system unmount, prevent some common foot shooting.
1775  */
1776 int
1777 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1778 {
1779         int error;
1780         proc_t p = vfs_context_proc(ctx);
1781
1782         /*
1783          * If the file system is not responding and MNT_NOBLOCK
1784          * is set and not a forced unmount then return EBUSY.
1785          */
1786         if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1787                 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1788                 error = EBUSY;
1789                 goto out;
1790         }
1791
1792         /*
1793          * Skip authorization if the mount is tagged as permissive and
1794          * this is not a forced-unmount attempt.
1795          */
1796         if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1797                 /*
1798                  * Only root, or the user that did the original mount is
1799                  * permitted to unmount this filesystem.
1800                  */
1801                 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1802                                 (error = suser(kauth_cred_get(), &p->p_acflag)))
1803                         goto out;
1804         }
1805         /*
1806          * Don't allow unmounting the root file system.
1807          */
1808         if (mp->mnt_flag & MNT_ROOTFS) {
1809                 error = EBUSY; /* the root is always busy */
1810                 goto out;
1811         }
1812
1813 #ifdef CONFIG_IMGSRC_ACCESS
1814         if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1815                 error = EBUSY;
1816                 goto out;
1817         }
1818 #endif /* CONFIG_IMGSRC_ACCESS */
1819
1820         return (dounmount(mp, flags, 1, ctx));
1821
1822 out:
1823         mount_drop(mp, 0);
1824         return(error);
1825 }
1826
1827 /*
1828  * Do the actual file system unmount.
1829  */
1830 int
1831 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1832 {
1833         vnode_t coveredvp = (vnode_t)0;
1834         int error;
1835         int needwakeup = 0;
1836         int forcedunmount = 0;
1837         int lflags = 0;
1838         struct vnode *devvp = NULLVP;
1839 #if CONFIG_TRIGGERS
1840         proc_t p = vfs_context_proc(ctx);
1841         int did_vflush = 0;
1842         int pflags_save = 0;
1843 #endif /* CONFIG_TRIGGERS */
1844
1845         mount_lock(mp);
1846
1847         /*
1848          * If already an unmount in progress just return EBUSY.
1849          * Even a forced unmount cannot override.
1850          */
1851         if (mp->mnt_lflag & MNT_LUNMOUNT) {
1852                 if (withref != 0)
1853                         mount_drop(mp, 1);
1854                 mount_unlock(mp);
1855                 return (EBUSY);
1856         }
1857
1858         if (flags & MNT_FORCE) {
1859                 forcedunmount = 1;
1860                 mp->mnt_lflag |= MNT_LFORCE;
1861         }
1862
1863 #if CONFIG_TRIGGERS
1864         if (flags & MNT_NOBLOCK && p != kernproc)
1865                 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
1866 #endif
1867
1868         mp->mnt_kern_flag |= MNTK_UNMOUNT;
1869         mp->mnt_lflag |= MNT_LUNMOUNT;
1870         mp->mnt_flag &=~ MNT_ASYNC;
1871         /*
1872          * anyone currently in the fast path that
1873          * trips over the cached rootvp will be
1874          * dumped out and forced into the slow path
1875          * to regenerate a new cached value
1876          */
1877         mp->mnt_realrootvp = NULLVP;
1878         mount_unlock(mp);
1879
1880         if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
1881                 /*
1882                  * Force unmount any mounts in this filesystem.
1883                  * If any unmounts fail - just leave them dangling.
1884                  * Avoids recursion.
1885                  */
1886                 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
1887         }
1888
1889         /*
1890          * taking the name_cache_lock exclusively will
1891          * insure that everyone is out of the fast path who
1892          * might be trying to use a now stale copy of
1893          * vp->v_mountedhere->mnt_realrootvp
1894          * bumping mount_generation causes the cached values
1895          * to be invalidated
1896          */
1897         name_cache_lock();
1898         mount_generation++;
1899         name_cache_unlock();
1900
1901
1902         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1903         if (withref != 0)
1904                 mount_drop(mp, 0);
1905 #if CONFIG_FSE
1906         fsevent_unmount(mp);  /* has to come first! */
1907 #endif
1908         error = 0;
1909         if (forcedunmount == 0) {
1910                 ubc_umount(mp); /* release cached vnodes */
1911                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1912                         error = VFS_SYNC(mp, MNT_WAIT, ctx);
1913                         if (error) {
1914                                 mount_lock(mp);
1915                                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1916                                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1917                                 mp->mnt_lflag &= ~MNT_LFORCE;
1918                                 goto out;
1919                         }
1920                 }
1921         }
1922
1923 #if CONFIG_TRIGGERS
1924         vfs_nested_trigger_unmounts(mp, flags, ctx);
1925         did_vflush = 1;
1926 #endif
1927         if (forcedunmount)
1928                 lflags |= FORCECLOSE;
1929         error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM  | SKIPROOT | lflags);
1930         if ((forcedunmount == 0) && error) {
1931                 mount_lock(mp);
1932                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1933                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1934                 mp->mnt_lflag &= ~MNT_LFORCE;
1935                 goto out;
1936         }
1937
1938         /* make sure there are no one in the mount iterations or lookup */
1939         mount_iterdrain(mp);
1940
1941         error = VFS_UNMOUNT(mp, flags, ctx);
1942         if (error) {
1943                 mount_iterreset(mp);
1944                 mount_lock(mp);
1945                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1946                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1947                 mp->mnt_lflag &= ~MNT_LFORCE;
1948                 goto out;
1949         }
1950
1951         /* increment the operations count */
1952         if (!error)
1953                 OSAddAtomic(1, &vfs_nummntops);
1954
1955         if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1956                 /* hold an io reference and drop the usecount before close */
1957                 devvp = mp->mnt_devvp;
1958                 vnode_getalways(devvp);
1959                 vnode_rele(devvp);
1960                 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1961                        ctx);
1962                 vnode_clearmountedon(devvp);
1963                 vnode_put(devvp);
1964         }
1965         lck_rw_done(&mp->mnt_rwlock);
1966         mount_list_remove(mp);
1967         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1968
1969         /* mark the mount point hook in the vp but not drop the ref yet */
1970         if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
1971                 /*
1972                  * The covered vnode needs special handling. Trying to get an
1973                  * iocount must not block here as this may lead to deadlocks
1974                  * if the Filesystem to which the covered vnode belongs is
1975                  * undergoing forced unmounts. Since we hold a usecount, the
1976                  * vnode cannot be reused (it can, however, still be terminated)
1977                  */
1978                 vnode_getalways(coveredvp);
1979                 vnode_lock_spin(coveredvp);
1980
1981                 mp->mnt_crossref++;
1982                 coveredvp->v_mountedhere = (struct mount *)0;
1983                 CLR(coveredvp->v_flag, VMOUNT);
1984
1985                 vnode_unlock(coveredvp);
1986                 vnode_put(coveredvp);
1987         }
1988
1989         mount_list_lock();
1990         mp->mnt_vtable->vfc_refcount--;
1991         mount_list_unlock();
1992
1993         cache_purgevfs(mp);     /* remove cache entries for this file sys */
1994         vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
1995         mount_lock(mp);
1996         mp->mnt_lflag |= MNT_LDEAD;
1997
1998         if (mp->mnt_lflag & MNT_LWAIT) {
1999                 /*
2000                  * do the wakeup here
2001                  * in case we block in mount_refdrain
2002                  * which will drop the mount lock
2003                  * and allow anyone blocked in vfs_busy
2004                  * to wakeup and see the LDEAD state
2005                  */
2006                 mp->mnt_lflag &= ~MNT_LWAIT;
2007                 wakeup((caddr_t)mp);
2008         }
2009         mount_refdrain(mp);
2010 out:
2011         if (mp->mnt_lflag & MNT_LWAIT) {
2012                 mp->mnt_lflag &= ~MNT_LWAIT;
2013                 needwakeup = 1;
2014         }
2015
2016 #if CONFIG_TRIGGERS
2017         if (flags & MNT_NOBLOCK && p != kernproc) {
2018                 // Restore P_NOREMOTEHANG bit to its previous value
2019                 if ((pflags_save & P_NOREMOTEHANG) == 0)
2020                         OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2021         }
2022
2023         /*
2024          * Callback and context are set together under the mount lock, and
2025          * never cleared, so we're safe to examine them here, drop the lock,
2026          * and call out.
2027          */
2028         if (mp->mnt_triggercallback != NULL) {
2029                 mount_unlock(mp);
2030                 if (error == 0) {
2031                         mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2032                 } else if (did_vflush) {
2033                         mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2034                 }
2035         } else {
2036                 mount_unlock(mp);
2037         }
2038 #else
2039         mount_unlock(mp);
2040 #endif /* CONFIG_TRIGGERS */
2041
2042         lck_rw_done(&mp->mnt_rwlock);
2043
2044         if (needwakeup)
2045                 wakeup((caddr_t)mp);
2046
2047         if (!error) {
2048                 if ((coveredvp != NULLVP)) {
2049                         vnode_t pvp = NULLVP;
2050
2051                         /*
2052                          * The covered vnode needs special handling. Trying to
2053                          * get an iocount must not block here as this may lead
2054                          * to deadlocks if the Filesystem to which the covered
2055                          * vnode belongs is undergoing forced unmounts. Since we
2056                          * hold a usecount, the  vnode cannot be reused
2057                          * (it can, however, still be terminated).
2058                          */
2059                         vnode_getalways(coveredvp);
2060
2061                         mount_dropcrossref(mp, coveredvp, 0);
2062                         /*
2063                          * We'll _try_ to detect if this really needs to be
2064                          * done. The coveredvp can only be in termination (or
2065                          * terminated) if the coveredvp's mount point is in a
2066                          * forced unmount (or has been) since we still hold the
2067                          * ref.
2068                          */
2069                         if (!vnode_isrecycled(coveredvp)) {
2070                                 pvp = vnode_getparent(coveredvp);
2071 #if CONFIG_TRIGGERS
2072                                 if (coveredvp->v_resolve) {
2073                                         vnode_trigger_rearm(coveredvp, ctx);
2074                                 }
2075 #endif
2076                         }
2077
2078                         vnode_rele(coveredvp);
2079                         vnode_put(coveredvp);
2080                         coveredvp = NULLVP;
2081
2082                         if (pvp) {
2083                                 lock_vnode_and_post(pvp, NOTE_WRITE);
2084                                 vnode_put(pvp);
2085                         }
2086                 } else if (mp->mnt_flag & MNT_ROOTFS) {
2087                                 mount_lock_destroy(mp);
2088 #if CONFIG_MACF
2089                                 mac_mount_label_destroy(mp);
2090 #endif
2091                                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2092                 } else
2093                         panic("dounmount: no coveredvp");
2094         }
2095         return (error);
2096 }
2097
2098 /*
2099  * Unmount any mounts in this filesystem.
2100  */
2101 void
2102 dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2103 {
2104         mount_t smp;
2105         fsid_t *fsids, fsid;
2106         int fsids_sz;
2107         int count = 0, i, m = 0;
2108         vnode_t vp;
2109
2110         mount_list_lock();
2111
2112         // Get an array to hold the submounts fsids.
2113         TAILQ_FOREACH(smp, &mountlist, mnt_list)
2114                 count++;
2115         fsids_sz = count * sizeof(fsid_t);
2116         MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2117         if (fsids == NULL) {
2118                 mount_list_unlock();
2119                 goto out;
2120         }
2121         fsids[0] = mp->mnt_vfsstat.f_fsid;      // Prime the pump
2122
2123         /*
2124          * Fill the array with submount fsids.
2125          * Since mounts are always added to the tail of the mount list, the
2126          * list is always in mount order.
2127          * For each mount check if the mounted-on vnode belongs to a
2128          * mount that's already added to our array of mounts to be unmounted.
2129          */
2130         for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2131                 vp = smp->mnt_vnodecovered;
2132                 if (vp == NULL)
2133                         continue;
2134                 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid;     // Underlying fsid
2135                 for (i = 0; i <= m; i++) {
2136                         if (fsids[i].val[0] == fsid.val[0] &&
2137                             fsids[i].val[1] == fsid.val[1]) {
2138                                 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2139                                 break;
2140                         }
2141                 }
2142         }
2143         mount_list_unlock();
2144
2145         // Unmount the submounts in reverse order. Ignore errors.
2146         for (i = m; i > 0; i--) {
2147                 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2148                 if (smp) {
2149                         mount_ref(smp, 0);
2150                         mount_iterdrop(smp);
2151                         (void) dounmount(smp, flags, 1, ctx);
2152                 }
2153         }
2154 out:
2155         if (fsids)
2156                 FREE(fsids, M_TEMP);
2157 }
2158
2159 void
2160 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2161 {
2162         vnode_lock(dp);
2163         mp->mnt_crossref--;
2164
2165         if (mp->mnt_crossref < 0)
2166                 panic("mount cross refs -ve");
2167
2168         if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2169
2170                 if (need_put)
2171                         vnode_put_locked(dp);
2172                 vnode_unlock(dp);
2173
2174                 mount_lock_destroy(mp);
2175 #if CONFIG_MACF
2176                 mac_mount_label_destroy(mp);
2177 #endif
2178                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2179                 return;
2180         }
2181         if (need_put)
2182                 vnode_put_locked(dp);
2183         vnode_unlock(dp);
2184 }
2185
2186
2187 /*
2188  * Sync each mounted filesystem.
2189  */
2190 #if DIAGNOSTIC
2191 int syncprt = 0;
2192 #endif
2193
2194 int print_vmpage_stat=0;
2195 int sync_timeout = 60;  // Sync time limit (sec)
2196
2197 static int
2198 sync_callback(mount_t mp, __unused void *arg)
2199 {
2200         if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2201                 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2202
2203                 mp->mnt_flag &= ~MNT_ASYNC;
2204                 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
2205                 if (asyncflag)
2206                         mp->mnt_flag |= MNT_ASYNC;
2207         }
2208
2209         return (VFS_RETURNED);
2210 }
2211
2212 /* ARGSUSED */
2213 int
2214 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
2215 {
2216         vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2217
2218         if (print_vmpage_stat) {
2219                 vm_countdirtypages();
2220         }
2221
2222 #if DIAGNOSTIC
2223         if (syncprt)
2224                 vfs_bufstats();
2225 #endif /* DIAGNOSTIC */
2226         return 0;
2227 }
2228
2229 static void
2230 sync_thread(void *arg, __unused wait_result_t wr)
2231 {
2232         int *timeout = (int *) arg;
2233
2234         vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2235
2236         if (timeout)
2237                 wakeup((caddr_t) timeout);
2238         if (print_vmpage_stat) {
2239                 vm_countdirtypages();
2240         }
2241
2242 #if DIAGNOSTIC
2243         if (syncprt)
2244                 vfs_bufstats();
2245 #endif /* DIAGNOSTIC */
2246 }
2247
2248 /*
2249  * Sync in a separate thread so we can time out if it blocks.
2250  */
2251 static int
2252 sync_async(int timeout)
2253 {
2254         thread_t thd;
2255         int error;
2256         struct timespec ts = {timeout, 0};
2257
2258         lck_mtx_lock(sync_mtx_lck);
2259         if (kernel_thread_start(sync_thread, &timeout, &thd) != KERN_SUCCESS) {
2260                 printf("sync_thread failed\n");
2261                 lck_mtx_unlock(sync_mtx_lck);
2262                 return (0);
2263         }
2264
2265         error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS | PDROP | PCATCH), "sync_thread", &ts);
2266         if (error) {
2267                 printf("sync timed out: %d sec\n", timeout);
2268         }
2269         thread_deallocate(thd);
2270
2271         return (0);
2272 }
2273
2274 /*
2275  * An in-kernel sync for power management to call.
2276  */
2277 __private_extern__ int
2278 sync_internal(void)
2279 {
2280         (void) sync_async(sync_timeout);
2281
2282         return 0;
2283 } /* end of sync_internal call */
2284
2285 /*
2286  * Change filesystem quotas.
2287  */
2288 #if QUOTA
2289 int
2290 quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
2291 {
2292         struct mount *mp;
2293         int error, quota_cmd, quota_status;
2294         caddr_t datap;
2295         size_t fnamelen;
2296         struct nameidata nd;
2297         vfs_context_t ctx = vfs_context_current();
2298         struct dqblk my_dqblk;
2299
2300         AUDIT_ARG(uid, uap->uid);
2301         AUDIT_ARG(cmd, uap->cmd);
2302         NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2303                uap->path, ctx);
2304         error = namei(&nd);
2305         if (error)
2306                 return (error);
2307         mp = nd.ni_vp->v_mount;
2308         vnode_put(nd.ni_vp);
2309         nameidone(&nd);
2310
2311         /* copyin any data we will need for downstream code */
2312         quota_cmd = uap->cmd >> SUBCMDSHIFT;
2313
2314         switch (quota_cmd) {
2315         case Q_QUOTAON:
2316                 /* uap->arg specifies a file from which to take the quotas */
2317                 fnamelen = MAXPATHLEN;
2318                 datap = kalloc(MAXPATHLEN);
2319                 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2320                 break;
2321         case Q_GETQUOTA:
2322                 /* uap->arg is a pointer to a dqblk structure. */
2323                 datap = (caddr_t) &my_dqblk;
2324                 break;
2325         case Q_SETQUOTA:
2326         case Q_SETUSE:
2327                 /* uap->arg is a pointer to a dqblk structure. */
2328                 datap = (caddr_t) &my_dqblk;
2329                 if (proc_is64bit(p)) {
2330                         struct user_dqblk       my_dqblk64;
2331                         error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2332                         if (error == 0) {
2333                                 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2334                         }
2335                 }
2336                 else {
2337                         error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2338                 }
2339                 break;
2340         case Q_QUOTASTAT:
2341                 /* uap->arg is a pointer to an integer */
2342                 datap = (caddr_t) &quota_status;
2343                 break;
2344         default:
2345                 datap = NULL;
2346                 break;
2347         } /* switch */
2348
2349         if (error == 0) {
2350                 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
2351         }
2352
2353         switch (quota_cmd) {
2354         case Q_QUOTAON:
2355                 if (datap != NULL)
2356                         kfree(datap, MAXPATHLEN);
2357                 break;
2358         case Q_GETQUOTA:
2359                 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2360                 if (error == 0) {
2361                         if (proc_is64bit(p)) {
2362                                 struct user_dqblk       my_dqblk64 = {.dqb_bhardlimit = 0};
2363                                 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2364                                 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2365                         }
2366                         else {
2367                                 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2368                         }
2369                 }
2370                 break;
2371         case Q_QUOTASTAT:
2372                 /* uap->arg is a pointer to an integer */
2373                 if (error == 0) {
2374                         error = copyout(datap, uap->arg, sizeof(quota_status));
2375                 }
2376                 break;
2377         default:
2378                 break;
2379         } /* switch */
2380
2381         return (error);
2382 }
2383 #else
2384 int
2385 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2386 {
2387         return (EOPNOTSUPP);
2388 }
2389 #endif /* QUOTA */
2390
2391 /*
2392  * Get filesystem statistics.
2393  *
2394  * Returns:     0                       Success
2395  *      namei:???
2396  *      vfs_update_vfsstat:???
2397  *      munge_statfs:EFAULT
2398  */
2399 /* ARGSUSED */
2400 int
2401 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
2402 {
2403         struct mount *mp;
2404         struct vfsstatfs *sp;
2405         int error;
2406         struct nameidata nd;
2407         vfs_context_t ctx = vfs_context_current();
2408         vnode_t vp;
2409
2410         NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2411                 UIO_USERSPACE, uap->path, ctx);
2412         error = namei(&nd);
2413         if (error)
2414                 return (error);
2415         vp = nd.ni_vp;
2416         mp = vp->v_mount;
2417         sp = &mp->mnt_vfsstat;
2418         nameidone(&nd);
2419
2420         error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
2421         if (error != 0) {
2422                 vnode_put(vp);
2423                 return (error);
2424         }
2425
2426         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2427         vnode_put(vp);
2428         return (error);
2429 }
2430
2431 /*
2432  * Get filesystem statistics.
2433  */
2434 /* ARGSUSED */
2435 int
2436 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
2437 {
2438         vnode_t vp;
2439         struct mount *mp;
2440         struct vfsstatfs *sp;
2441         int error;
2442
2443         AUDIT_ARG(fd, uap->fd);
2444
2445         if ( (error = file_vnode(uap->fd, &vp)) )
2446                 return (error);
2447
2448         error = vnode_getwithref(vp);
2449         if (error) {
2450                 file_drop(uap->fd);
2451                 return (error);
2452         }
2453
2454         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2455
2456         mp = vp->v_mount;
2457         if (!mp) {
2458                 error = EBADF;
2459                 goto out;
2460         }
2461         sp = &mp->mnt_vfsstat;
2462         if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
2463                 goto out;
2464         }
2465
2466         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2467
2468 out:
2469         file_drop(uap->fd);
2470         vnode_put(vp);
2471
2472         return (error);
2473 }
2474
2475 /*
2476  * Common routine to handle copying of statfs64 data to user space
2477  */
2478 static int
2479 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2480 {
2481         int error;
2482         struct statfs64 sfs;
2483
2484         bzero(&sfs, sizeof(sfs));
2485
2486         sfs.f_bsize = sfsp->f_bsize;
2487         sfs.f_iosize = (int32_t)sfsp->f_iosize;
2488         sfs.f_blocks = sfsp->f_blocks;
2489         sfs.f_bfree = sfsp->f_bfree;
2490         sfs.f_bavail = sfsp->f_bavail;
2491         sfs.f_files = sfsp->f_files;
2492         sfs.f_ffree = sfsp->f_ffree;
2493         sfs.f_fsid = sfsp->f_fsid;
2494         sfs.f_owner = sfsp->f_owner;
2495         sfs.f_type = mp->mnt_vtable->vfc_typenum;
2496         sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2497         sfs.f_fssubtype = sfsp->f_fssubtype;
2498         if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2499                 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2500         } else {
2501                 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2502         }
2503         strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2504         strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2505
2506         error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2507
2508         return(error);
2509 }
2510
2511 /*
2512  * Get file system statistics in 64-bit mode
2513  */
2514 int
2515 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2516 {
2517         struct mount *mp;
2518         struct vfsstatfs *sp;
2519         int error;
2520         struct nameidata nd;
2521         vfs_context_t ctxp = vfs_context_current();
2522         vnode_t vp;
2523
2524         NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2525                 UIO_USERSPACE, uap->path, ctxp);
2526         error = namei(&nd);
2527         if (error)
2528                 return (error);
2529         vp = nd.ni_vp;
2530         mp = vp->v_mount;
2531         sp = &mp->mnt_vfsstat;
2532         nameidone(&nd);
2533
2534         error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
2535         if (error != 0) {
2536                 vnode_put(vp);
2537                 return (error);
2538         }
2539
2540         error = statfs64_common(mp, sp, uap->buf);
2541         vnode_put(vp);
2542
2543         return (error);
2544 }
2545
2546 /*
2547  * Get file system statistics in 64-bit mode
2548  */
2549 int
2550 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2551 {
2552         struct vnode *vp;
2553         struct mount *mp;
2554         struct vfsstatfs *sp;
2555         int error;
2556
2557         AUDIT_ARG(fd, uap->fd);
2558
2559         if ( (error = file_vnode(uap->fd, &vp)) )
2560                 return (error);
2561
2562         error = vnode_getwithref(vp);
2563         if (error) {
2564                 file_drop(uap->fd);
2565                 return (error);
2566         }
2567
2568         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2569
2570         mp = vp->v_mount;
2571         if (!mp) {
2572                 error = EBADF;
2573                 goto out;
2574         }
2575         sp = &mp->mnt_vfsstat;
2576         if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2577                 goto out;
2578         }
2579
2580         error = statfs64_common(mp, sp, uap->buf);
2581
2582 out:
2583         file_drop(uap->fd);
2584         vnode_put(vp);
2585
2586         return (error);
2587 }
2588
2589 struct getfsstat_struct {
2590         user_addr_t     sfsp;
2591         user_addr_t     *mp;
2592         int             count;
2593         int             maxcount;
2594         int             flags;
2595         int             error;
2596 };
2597
2598
2599 static int
2600 getfsstat_callback(mount_t mp, void * arg)
2601 {
2602
2603         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2604         struct vfsstatfs *sp;
2605         int error, my_size;
2606         vfs_context_t ctx = vfs_context_current();
2607
2608         if (fstp->sfsp && fstp->count < fstp->maxcount) {
2609                 sp = &mp->mnt_vfsstat;
2610                 /*
2611                  * If MNT_NOWAIT is specified, do not refresh the
2612                  * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2613                  */
2614                 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2615                         (error = vfs_update_vfsstat(mp, ctx,
2616                             VFS_USER_EVENT))) {
2617                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2618                         return(VFS_RETURNED);
2619                 }
2620
2621                 /*
2622                  * Need to handle LP64 version of struct statfs
2623                  */
2624                 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
2625                 if (error) {
2626                         fstp->error = error;
2627                         return(VFS_RETURNED_DONE);
2628                 }
2629                 fstp->sfsp += my_size;
2630
2631                 if (fstp->mp) {
2632 #if CONFIG_MACF
2633                         error = mac_mount_label_get(mp, *fstp->mp);
2634                         if (error) {
2635                                 fstp->error = error;
2636                                 return(VFS_RETURNED_DONE);
2637                         }
2638 #endif
2639                         fstp->mp++;
2640                 }
2641         }
2642         fstp->count++;
2643         return(VFS_RETURNED);
2644 }
2645
2646 /*
2647  * Get statistics on all filesystems.
2648  */
2649 int
2650 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2651 {
2652         struct __mac_getfsstat_args muap;
2653
2654         muap.buf = uap->buf;
2655         muap.bufsize = uap->bufsize;
2656         muap.mac = USER_ADDR_NULL;
2657         muap.macsize = 0;
2658         muap.flags = uap->flags;
2659
2660         return (__mac_getfsstat(p, &muap, retval));
2661 }
2662
2663 /*
2664  * __mac_getfsstat: Get MAC-related file system statistics
2665  *
2666  * Parameters:    p                        (ignored)
2667  *                uap                      User argument descriptor (see below)
2668  *                retval                   Count of file system statistics (N stats)
2669  *
2670  * Indirect:      uap->bufsize             Buffer size
2671  *                uap->macsize             MAC info size
2672  *                uap->buf                 Buffer where information will be returned
2673  *                uap->mac                 MAC info
2674  *                uap->flags               File system flags
2675  *
2676  *
2677  * Returns:        0                       Success
2678  *                !0                       Not success
2679  *
2680  */
2681 int
2682 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
2683 {
2684         user_addr_t sfsp;
2685         user_addr_t *mp;
2686         size_t count, maxcount, bufsize, macsize;
2687         struct getfsstat_struct fst;
2688
2689         bufsize = (size_t) uap->bufsize;
2690         macsize = (size_t) uap->macsize;
2691
2692         if (IS_64BIT_PROCESS(p)) {
2693                 maxcount = bufsize / sizeof(struct user64_statfs);
2694         }
2695         else {
2696                 maxcount = bufsize / sizeof(struct user32_statfs);
2697         }
2698         sfsp = uap->buf;
2699         count = 0;
2700
2701         mp = NULL;
2702
2703 #if CONFIG_MACF
2704         if (uap->mac != USER_ADDR_NULL) {
2705                 u_int32_t *mp0;
2706                 int error;
2707                 unsigned int i;
2708
2709                 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2710                 if (count != maxcount)
2711                         return (EINVAL);
2712
2713                 /* Copy in the array */
2714                 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2715                 if (mp0 == NULL) {
2716                         return (ENOMEM);
2717                 }
2718
2719                 error = copyin(uap->mac, mp0, macsize);
2720                 if (error) {
2721                         FREE(mp0, M_MACTEMP);
2722                         return (error);
2723                 }
2724
2725                 /* Normalize to an array of user_addr_t */
2726                 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
2727                 if (mp == NULL) {
2728                         FREE(mp0, M_MACTEMP);
2729                         return (ENOMEM);
2730                 }
2731
2732                 for (i = 0; i < count; i++) {
2733                         if (IS_64BIT_PROCESS(p))
2734                                 mp[i] = ((user_addr_t *)mp0)[i];
2735                         else
2736                                 mp[i] = (user_addr_t)mp0[i];
2737                 }
2738                 FREE(mp0, M_MACTEMP);
2739         }
2740 #endif
2741
2742
2743         fst.sfsp = sfsp;
2744         fst.mp = mp;
2745         fst.flags = uap->flags;
2746         fst.count = 0;
2747         fst.error = 0;
2748         fst.maxcount = maxcount;
2749
2750
2751         vfs_iterate(0, getfsstat_callback, &fst);
2752
2753         if (mp)
2754                 FREE(mp, M_MACTEMP);
2755
2756         if (fst.error ) {
2757                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2758                 return(fst.error);
2759         }
2760
2761         if (fst.sfsp && fst.count > fst.maxcount)
2762                 *retval = fst.maxcount;
2763         else
2764                 *retval = fst.count;
2765         return (0);
2766 }
2767
2768 static int
2769 getfsstat64_callback(mount_t mp, void * arg)
2770 {
2771         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2772         struct vfsstatfs *sp;
2773         int error;
2774
2775         if (fstp->sfsp && fstp->count < fstp->maxcount) {
2776                 sp = &mp->mnt_vfsstat;
2777                 /*
2778                  * If MNT_NOWAIT is specified, do not refresh the fsstat
2779                  * cache. MNT_WAIT overrides MNT_NOWAIT.
2780                  *
2781                  * We treat MNT_DWAIT as MNT_WAIT for all instances of
2782                  * getfsstat, since the constants are out of the same
2783                  * namespace.
2784                  */
2785                 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2786                      (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2787                     (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2788                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2789                         return(VFS_RETURNED);
2790                 }
2791
2792                 error = statfs64_common(mp, sp, fstp->sfsp);
2793                 if (error) {
2794                         fstp->error = error;
2795                         return(VFS_RETURNED_DONE);
2796                 }
2797                 fstp->sfsp += sizeof(struct statfs64);
2798         }
2799         fstp->count++;
2800         return(VFS_RETURNED);
2801 }
2802
2803 /*
2804  * Get statistics on all file systems in 64 bit mode.
2805  */
2806 int
2807 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2808 {
2809         user_addr_t sfsp;
2810         int count, maxcount;
2811         struct getfsstat_struct fst;
2812
2813         maxcount = uap->bufsize / sizeof(struct statfs64);
2814
2815         sfsp = uap->buf;
2816         count = 0;
2817
2818         fst.sfsp = sfsp;
2819         fst.flags = uap->flags;
2820         fst.count = 0;
2821         fst.error = 0;
2822         fst.maxcount = maxcount;
2823
2824         vfs_iterate(0, getfsstat64_callback, &fst);
2825
2826         if (fst.error ) {
2827                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2828                 return(fst.error);
2829         }
2830
2831         if (fst.sfsp && fst.count > fst.maxcount)
2832                 *retval = fst.maxcount;
2833         else
2834                 *retval = fst.count;
2835
2836         return (0);
2837 }
2838
2839 /*
2840  * gets the associated vnode with the file descriptor passed.
2841  * as input
2842  *
2843  * INPUT
2844  * ctx - vfs context of caller
2845  * fd - file descriptor for which vnode is required.
2846  * vpp - Pointer to pointer to vnode to be returned.
2847  *
2848  * The vnode is returned with an iocount so any vnode obtained
2849  * by this call needs a vnode_put
2850  *
2851  */
2852 static int
2853 vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
2854 {
2855         int error;
2856         vnode_t vp;
2857         struct fileproc *fp;
2858         proc_t p = vfs_context_proc(ctx);
2859
2860         *vpp =  NULLVP;
2861
2862         error = fp_getfvp(p, fd, &fp, &vp);
2863         if (error)
2864                 return (error);
2865
2866         error = vnode_getwithref(vp);
2867         if (error) {
2868                 (void)fp_drop(p, fd, fp, 0);
2869                 return (error);
2870         }
2871
2872         (void)fp_drop(p, fd, fp, 0);
2873         *vpp = vp;
2874         return (error);
2875 }
2876
2877 /*
2878  * Wrapper function around namei to start lookup from a directory
2879  * specified by a file descriptor ni_dirfd.
2880  *
2881  * In addition to all the errors returned by namei, this call can
2882  * return ENOTDIR if the file descriptor does not refer to a directory.
2883  * and EBADF if the file descriptor is not valid.
2884  */
2885 int
2886 nameiat(struct nameidata *ndp, int dirfd)
2887 {
2888         if ((dirfd != AT_FDCWD) &&
2889             !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
2890             !(ndp->ni_cnd.cn_flags & USEDVP)) {
2891                 int error = 0;
2892                 char c;
2893
2894                 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
2895                         error = copyin(ndp->ni_dirp, &c, sizeof(char));
2896                         if (error)
2897                                 return (error);
2898                 } else {
2899                         c = *((char *)(ndp->ni_dirp));
2900                 }
2901
2902                 if (c != '/') {
2903                         vnode_t dvp_at;
2904
2905                         error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
2906                             &dvp_at);
2907                         if (error)
2908                                 return (error);
2909
2910                         if (vnode_vtype(dvp_at) != VDIR) {
2911                                 vnode_put(dvp_at);
2912                                 return (ENOTDIR);
2913                         }
2914
2915                         ndp->ni_dvp = dvp_at;
2916                         ndp->ni_cnd.cn_flags |= USEDVP;
2917                         error = namei(ndp);
2918                         ndp->ni_cnd.cn_flags &= ~USEDVP;
2919                         vnode_put(dvp_at);
2920                         return (error);
2921                 }
2922         }
2923
2924         return (namei(ndp));
2925 }
2926
2927 /*
2928  * Change current working directory to a given file descriptor.
2929  */
2930 /* ARGSUSED */
2931 static int
2932 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
2933 {
2934         struct filedesc *fdp = p->p_fd;
2935         vnode_t vp;
2936         vnode_t tdp;
2937         vnode_t tvp;
2938         struct mount *mp;
2939         int error;
2940         vfs_context_t ctx = vfs_context_current();
2941
2942         AUDIT_ARG(fd, uap->fd);
2943         if (per_thread && uap->fd == -1) {
2944                 /*
2945                  * Switching back from per-thread to per process CWD; verify we
2946                  * in fact have one before proceeding.  The only success case
2947                  * for this code path is to return 0 preemptively after zapping
2948                  * the thread structure contents.
2949                  */
2950                 thread_t th = vfs_context_thread(ctx);
2951                 if (th) {
2952                         uthread_t uth = get_bsdthread_info(th);
2953                         tvp = uth->uu_cdir;
2954                         uth->uu_cdir = NULLVP;
2955                         if (tvp != NULLVP) {
2956                                 vnode_rele(tvp);
2957                                 return (0);
2958                         }
2959                 }
2960                 return (EBADF);
2961         }
2962
2963         if ( (error = file_vnode(uap->fd, &vp)) )
2964                 return(error);
2965         if ( (error = vnode_getwithref(vp)) ) {
2966                 file_drop(uap->fd);
2967                 return(error);
2968         }
2969
2970         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2971
2972         if (vp->v_type != VDIR) {
2973                 error = ENOTDIR;
2974                 goto out;
2975         }
2976
2977 #if CONFIG_MACF
2978         error = mac_vnode_check_chdir(ctx, vp);
2979         if (error)
2980                 goto out;
2981 #endif
2982         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2983         if (error)
2984                 goto out;
2985
2986         while (!error && (mp = vp->v_mountedhere) != NULL) {
2987                 if (vfs_busy(mp, LK_NOWAIT)) {
2988                         error = EACCES;
2989                         goto out;
2990                 }
2991                 error = VFS_ROOT(mp, &tdp, ctx);
2992                 vfs_unbusy(mp);
2993                 if (error)
2994                         break;
2995                 vnode_put(vp);
2996                 vp = tdp;
2997         }
2998         if (error)
2999                 goto out;
3000         if ( (error = vnode_ref(vp)) )
3001                 goto out;
3002         vnode_put(vp);
3003
3004         if (per_thread) {
3005                 thread_t th = vfs_context_thread(ctx);
3006                 if (th) {
3007                         uthread_t uth = get_bsdthread_info(th);
3008                         tvp = uth->uu_cdir;
3009                         uth->uu_cdir = vp;
3010                         OSBitOrAtomic(P_THCWD, &p->p_flag);
3011                 } else {
3012                         vnode_rele(vp);
3013                         return (ENOENT);
3014                 }
3015         } else {
3016                 proc_fdlock(p);
3017                 tvp = fdp->fd_cdir;
3018                 fdp->fd_cdir = vp;
3019                 proc_fdunlock(p);
3020         }
3021
3022         if (tvp)
3023                 vnode_rele(tvp);
3024         file_drop(uap->fd);
3025
3026         return (0);
3027 out:
3028         vnode_put(vp);
3029         file_drop(uap->fd);
3030
3031         return(error);
3032 }
3033
3034 int
3035 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
3036 {
3037         return common_fchdir(p, uap, 0);
3038 }
3039
3040 int
3041 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
3042 {
3043         return common_fchdir(p, (void *)uap, 1);
3044 }
3045
3046 /*
3047  * Change current working directory (".").
3048  *
3049  * Returns:     0                       Success
3050  *      change_dir:ENOTDIR
3051  *      change_dir:???
3052  *      vnode_ref:ENOENT                No such file or directory
3053  */
3054 /* ARGSUSED */
3055 static int
3056 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
3057 {
3058         struct filedesc *fdp = p->p_fd;
3059         int error;
3060         struct nameidata nd;
3061         vnode_t tvp;
3062         vfs_context_t ctx = vfs_context_current();
3063
3064         NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
3065                 UIO_USERSPACE, uap->path, ctx);
3066         error = change_dir(&nd, ctx);
3067         if (error)
3068                 return (error);
3069         if ( (error = vnode_ref(nd.ni_vp)) ) {
3070                 vnode_put(nd.ni_vp);
3071                 return (error);
3072         }
3073         /*
3074          * drop the iocount we picked up in change_dir
3075          */
3076         vnode_put(nd.ni_vp);
3077
3078         if (per_thread) {
3079                 thread_t th = vfs_context_thread(ctx);
3080                 if (th) {
3081                         uthread_t uth = get_bsdthread_info(th);
3082                         tvp = uth->uu_cdir;
3083                         uth->uu_cdir = nd.ni_vp;
3084                         OSBitOrAtomic(P_THCWD, &p->p_flag);
3085                 } else {
3086                         vnode_rele(nd.ni_vp);
3087                         return (ENOENT);
3088                 }
3089         } else {
3090                 proc_fdlock(p);
3091                 tvp = fdp->fd_cdir;
3092                 fdp->fd_cdir = nd.ni_vp;
3093                 proc_fdunlock(p);
3094         }
3095
3096         if (tvp)
3097                 vnode_rele(tvp);
3098
3099         return (0);
3100 }
3101
3102
3103 /*
3104  * chdir
3105  *
3106  * Change current working directory (".") for the entire process
3107  *
3108  * Parameters:  p       Process requesting the call
3109  *              uap     User argument descriptor (see below)
3110  *              retval  (ignored)
3111  *
3112  * Indirect parameters: uap->path       Directory path
3113  *
3114  * Returns:     0                       Success
3115  *              common_chdir: ENOTDIR
3116  *              common_chdir: ENOENT    No such file or directory
3117  *              common_chdir: ???
3118  *
3119  */
3120 int
3121 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
3122 {
3123         return common_chdir(p, (void *)uap, 0);
3124 }
3125
3126 /*
3127  * __pthread_chdir
3128  *
3129  * Change current working directory (".") for a single thread
3130  *
3131  * Parameters:  p       Process requesting the call
3132  *              uap     User argument descriptor (see below)
3133  *              retval  (ignored)
3134  *
3135  * Indirect parameters: uap->path       Directory path
3136  *
3137  * Returns:     0                       Success
3138  *              common_chdir: ENOTDIR
3139  *              common_chdir: ENOENT    No such file or directory
3140  *              common_chdir: ???
3141  *
3142  */
3143 int
3144 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
3145 {
3146         return common_chdir(p, (void *)uap, 1);
3147 }
3148
3149
3150 /*
3151  * Change notion of root (``/'') directory.
3152  */
3153 /* ARGSUSED */
3154 int
3155 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
3156 {
3157         struct filedesc *fdp = p->p_fd;
3158         int error;
3159         struct nameidata nd;
3160         vnode_t tvp;
3161         vfs_context_t ctx = vfs_context_current();
3162
3163         if ((error = suser(kauth_cred_get(), &p->p_acflag)))
3164                 return (error);
3165
3166         NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
3167                 UIO_USERSPACE, uap->path, ctx);
3168         error = change_dir(&nd, ctx);
3169         if (error)
3170                 return (error);
3171
3172 #if CONFIG_MACF
3173         error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3174             &nd.ni_cnd);
3175         if (error) {
3176                 vnode_put(nd.ni_vp);
3177                 return (error);
3178         }
3179 #endif
3180
3181         if ( (error = vnode_ref(nd.ni_vp)) ) {
3182                 vnode_put(nd.ni_vp);
3183                 return (error);
3184         }
3185         vnode_put(nd.ni_vp);
3186
3187         proc_fdlock(p);
3188         tvp = fdp->fd_rdir;
3189         fdp->fd_rdir = nd.ni_vp;
3190         fdp->fd_flags |= FD_CHROOT;
3191         proc_fdunlock(p);
3192
3193         if (tvp != NULL)
3194                 vnode_rele(tvp);
3195
3196         return (0);
3197 }
3198
3199 /*
3200  * Common routine for chroot and chdir.
3201  *
3202  * Returns:     0                       Success
3203  *              ENOTDIR                 Not a directory
3204  *              namei:???               [anything namei can return]
3205  *              vnode_authorize:???     [anything vnode_authorize can return]
3206  */
3207 static int
3208 change_dir(struct nameidata *ndp, vfs_context_t ctx)
3209 {
3210         vnode_t vp;
3211         int error;
3212
3213         if ((error = namei(ndp)))
3214                 return (error);
3215         nameidone(ndp);
3216         vp = ndp->ni_vp;
3217
3218         if (vp->v_type != VDIR) {
3219                 vnode_put(vp);
3220                 return (ENOTDIR);
3221         }
3222
3223 #if CONFIG_MACF
3224         error = mac_vnode_check_chdir(ctx, vp);
3225         if (error) {
3226                 vnode_put(vp);
3227                 return (error);
3228         }
3229 #endif
3230
3231         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3232         if (error) {
3233                 vnode_put(vp);
3234                 return (error);
3235         }
3236
3237         return (error);
3238 }
3239
3240 /*
3241  * Free the vnode data (for directories) associated with the file glob.
3242  */
3243 struct fd_vn_data *
3244 fg_vn_data_alloc(void)
3245 {
3246         struct fd_vn_data *fvdata;
3247
3248         /* Allocate per fd vnode data */
3249         MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3250                M_FD_VN_DATA, M_WAITOK | M_ZERO);
3251         lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3252         return fvdata;
3253 }
3254
3255 /*
3256  * Free the vnode data (for directories) associated with the file glob.
3257  */
3258 void
3259 fg_vn_data_free(void *fgvndata)
3260 {
3261         struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3262
3263         if (fvdata->fv_buf)
3264                 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3265         lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3266         FREE(fvdata, M_FD_VN_DATA);
3267 }
3268
3269 /*
3270  * Check permissions, allocate an open file structure,
3271  * and call the device open routine if any.
3272  *
3273  * Returns:     0                       Success
3274  *              EINVAL
3275  *              EINTR
3276  *      falloc:ENFILE
3277  *      falloc:EMFILE
3278  *      falloc:ENOMEM
3279  *      vn_open_auth:???
3280  *      dupfdopen:???
3281  *      VNOP_ADVLOCK:???
3282  *      vnode_setsize:???
3283  *
3284  * XXX Need to implement uid, gid
3285  */
3286 int
3287 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3288     struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3289     int32_t *retval)
3290 {
3291         proc_t p = vfs_context_proc(ctx);
3292         uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
3293         struct fileproc *fp;
3294         vnode_t vp;
3295         int flags, oflags;
3296         int type, indx, error;
3297         struct flock lf;
3298         int no_controlling_tty = 0;
3299         int deny_controlling_tty = 0;
3300         struct session *sessp = SESSION_NULL;
3301
3302         oflags = uflags;
3303
3304         if ((oflags & O_ACCMODE) == O_ACCMODE)
3305                 return(EINVAL);
3306         flags = FFLAGS(uflags);
3307
3308         AUDIT_ARG(fflags, oflags);
3309         AUDIT_ARG(mode, vap->va_mode);
3310
3311         if ((error = falloc_withalloc(p,
3312             &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
3313                 return (error);
3314         }
3315         uu->uu_dupfd = -indx - 1;
3316
3317         if (!(p->p_flag & P_CONTROLT)) {
3318                 sessp = proc_session(p);
3319                 no_controlling_tty = 1;
3320                 /*
3321                  * If conditions would warrant getting a controlling tty if
3322                  * the device being opened is a tty (see ttyopen in tty.c),
3323                  * but the open flags deny it, set a flag in the session to
3324                  * prevent it.
3325                  */
3326                 if (SESS_LEADER(p, sessp) &&
3327                     sessp->s_ttyvp == NULL &&
3328                     (flags & O_NOCTTY)) {
3329                         session_lock(sessp);
3330                         sessp->s_flags |= S_NOCTTY;
3331                         session_unlock(sessp);
3332                         deny_controlling_tty = 1;
3333                 }
3334         }
3335
3336         if ((error = vn_open_auth(ndp, &flags, vap))) {
3337                 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){        /* XXX from fdopen */
3338                         if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
3339                                 fp_drop(p, indx, NULL, 0);
3340                                 *retval = indx;
3341                                 if (deny_controlling_tty) {
3342                                         session_lock(sessp);
3343                                         sessp->s_flags &= ~S_NOCTTY;
3344                                         session_unlock(sessp);
3345                                 }
3346                                 if (sessp != SESSION_NULL)
3347                                         session_rele(sessp);
3348                                 return (0);
3349                         }
3350                 }
3351                 if (error == ERESTART)
3352                         error = EINTR;
3353                 fp_free(p, indx, fp);
3354
3355                 if (deny_controlling_tty) {
3356                         session_lock(sessp);
3357                         sessp->s_flags &= ~S_NOCTTY;
3358                         session_unlock(sessp);
3359                 }
3360                 if (sessp != SESSION_NULL)
3361                         session_rele(sessp);
3362                 return (error);
3363         }
3364         uu->uu_dupfd = 0;
3365         vp = ndp->ni_vp;
3366
3367         fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
3368         fp->f_fglob->fg_ops = &vnops;
3369         fp->f_fglob->fg_data = (caddr_t)vp;
3370
3371 #if CONFIG_PROTECT
3372         if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) {
3373                 if (vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) {
3374                         fp->f_fglob->fg_flag |= FENCRYPTED;
3375                 }
3376         }
3377 #endif
3378
3379         if (flags & (O_EXLOCK | O_SHLOCK)) {
3380                 lf.l_whence = SEEK_SET;
3381                 lf.l_start = 0;
3382                 lf.l_len = 0;
3383                 if (flags & O_EXLOCK)
3384                         lf.l_type = F_WRLCK;
3385                 else
3386                         lf.l_type = F_RDLCK;
3387                 type = F_FLOCK;
3388                 if ((flags & FNONBLOCK) == 0)
3389                         type |= F_WAIT;
3390 #if CONFIG_MACF
3391                 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3392                     F_SETLK, &lf);
3393                 if (error)
3394                         goto bad;
3395 #endif
3396                 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
3397                         goto bad;
3398                 fp->f_fglob->fg_flag |= FHASLOCK;
3399         }
3400
3401         /* try to truncate by setting the size attribute */
3402         if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3403                 goto bad;
3404
3405         /*
3406          * If the open flags denied the acquisition of a controlling tty,
3407          * clear the flag in the session structure that prevented the lower
3408          * level code from assigning one.
3409          */
3410         if (deny_controlling_tty) {
3411                 session_lock(sessp);
3412                 sessp->s_flags &= ~S_NOCTTY;
3413                 session_unlock(sessp);
3414         }
3415
3416         /*
3417          * If a controlling tty was set by the tty line discipline, then we
3418          * want to set the vp of the tty into the session structure.  We have
3419          * a race here because we can't get to the vp for the tp in ttyopen,
3420          * because it's not passed as a parameter in the open path.
3421          */
3422         if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
3423                 vnode_t ttyvp;
3424
3425                 session_lock(sessp);
3426                 ttyvp = sessp->s_ttyvp;
3427                 sessp->s_ttyvp = vp;
3428                 sessp->s_ttyvid = vnode_vid(vp);
3429                 session_unlock(sessp);
3430         }
3431
3432         /*
3433          * For directories we hold some additional information in the fd.
3434          */
3435         if (vnode_vtype(vp) == VDIR) {
3436                 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3437         } else {
3438                 fp->f_fglob->fg_vn_data = NULL;
3439         }
3440
3441         vnode_put(vp);
3442
3443         proc_fdlock(p);
3444         if (flags & O_CLOEXEC)
3445                 *fdflags(p, indx) |= UF_EXCLOSE;
3446         if (flags & O_CLOFORK)
3447                 *fdflags(p, indx) |= UF_FORKCLOSE;
3448         procfdtbl_releasefd(p, indx, NULL);
3449         fp_drop(p, indx, fp, 1);
3450         proc_fdunlock(p);
3451
3452         *retval = indx;
3453
3454         if (sessp != SESSION_NULL)
3455                 session_rele(sessp);
3456         return (0);
3457 bad:
3458         if (deny_controlling_tty) {
3459                 session_lock(sessp);
3460                 sessp->s_flags &= ~S_NOCTTY;
3461                 session_unlock(sessp);
3462         }
3463         if (sessp != SESSION_NULL)
3464                 session_rele(sessp);
3465
3466         struct vfs_context context = *vfs_context_current();
3467         context.vc_ucred = fp->f_fglob->fg_cred;
3468
3469         if ((fp->f_fglob->fg_flag & FHASLOCK) &&
3470             (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3471                 lf.l_whence = SEEK_SET;
3472                 lf.l_start = 0;
3473                 lf.l_len = 0;
3474                 lf.l_type = F_UNLCK;
3475
3476                 (void)VNOP_ADVLOCK(
3477                         vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
3478         }
3479
3480         vn_close(vp, fp->f_fglob->fg_flag, &context);
3481         vnode_put(vp);
3482         fp_free(p, indx, fp);
3483
3484         return (error);
3485 }
3486
3487 /*
3488  * While most of the *at syscall handlers can call nameiat() which
3489  * is a wrapper around namei, the use of namei and initialisation
3490  * of nameidata are far removed and in different functions  - namei
3491  * gets called in vn_open_auth for open1. So we'll just do here what
3492  * nameiat() does.
3493  */
3494 static int
3495 open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3496     struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3497     int dirfd)
3498 {
3499         if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3500                 int error;
3501                 char c;
3502
3503                 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3504                         error = copyin(ndp->ni_dirp, &c, sizeof(char));
3505                         if (error)
3506                                 return (error);
3507                 } else {
3508                         c = *((char *)(ndp->ni_dirp));
3509                 }
3510
3511                 if (c != '/') {
3512                         vnode_t dvp_at;
3513
3514                         error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3515                             &dvp_at);
3516                         if (error)
3517                                 return (error);
3518
3519                         if (vnode_vtype(dvp_at) != VDIR) {
3520                                 vnode_put(dvp_at);
3521                                 return (ENOTDIR);
3522                         }
3523
3524                         ndp->ni_dvp = dvp_at;
3525                         ndp->ni_cnd.cn_flags |= USEDVP;
3526                         error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3527                             retval);
3528                         vnode_put(dvp_at);
3529                         return (error);
3530                 }
3531         }
3532
3533         return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
3534 }
3535
3536 /*
3537  * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3538  *
3539  * Parameters:  p                       Process requesting the open
3540  *              uap                     User argument descriptor (see below)
3541  *              retval                  Pointer to an area to receive the
3542  *                                      return calue from the system call
3543  *
3544  * Indirect:    uap->path               Path to open (same as 'open')
3545  *              uap->flags              Flags to open (same as 'open'
3546  *              uap->uid                UID to set, if creating
3547  *              uap->gid                GID to set, if creating
3548  *              uap->mode               File mode, if creating (same as 'open')
3549  *              uap->xsecurity          ACL to set, if creating
3550  *
3551  * Returns:     0                       Success
3552  *              !0                      errno value
3553  *
3554  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
3555  *
3556  * XXX:         We should enummerate the possible errno values here, and where
3557  *              in the code they originated.
3558  */
3559 int
3560 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
3561 {
3562         struct filedesc *fdp = p->p_fd;
3563         int ciferror;
3564         kauth_filesec_t xsecdst;
3565         struct vnode_attr va;
3566         struct nameidata nd;
3567         int cmode;
3568
3569         AUDIT_ARG(owner, uap->uid, uap->gid);
3570
3571         xsecdst = NULL;
3572         if ((uap->xsecurity != USER_ADDR_NULL) &&
3573             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3574                 return ciferror;
3575
3576         VATTR_INIT(&va);
3577         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3578         VATTR_SET(&va, va_mode, cmode);
3579         if (uap->uid != KAUTH_UID_NONE)
3580                 VATTR_SET(&va, va_uid, uap->uid);
3581         if (uap->gid != KAUTH_GID_NONE)
3582                 VATTR_SET(&va, va_gid, uap->gid);
3583         if (xsecdst != NULL)
3584                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3585
3586         NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3587                uap->path, vfs_context_current());
3588
3589         ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3590                          fileproc_alloc_init, NULL, retval);
3591         if (xsecdst != NULL)
3592                 kauth_filesec_free(xsecdst);
3593
3594         return ciferror;
3595 }
3596
3597 /*
3598  * Go through the data-protected atomically controlled open (2)
3599  *
3600  * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3601  */
3602 int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3603         int flags = uap->flags;
3604         int class = uap->class;
3605         int dpflags = uap->dpflags;
3606
3607         /*
3608          * Follow the same path as normal open(2)
3609          * Look up the item if it exists, and acquire the vnode.
3610          */
3611         struct filedesc *fdp = p->p_fd;
3612         struct vnode_attr va;
3613         struct nameidata nd;
3614         int cmode;
3615         int error;
3616
3617         VATTR_INIT(&va);
3618         /* Mask off all but regular access permissions */
3619         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3620         VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3621
3622         NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3623                uap->path, vfs_context_current());
3624
3625         /*
3626          * Initialize the extra fields in vnode_attr to pass down our
3627          * extra fields.
3628          * 1. target cprotect class.
3629          * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3630          */
3631         if (flags & O_CREAT) {
3632                 VATTR_SET(&va, va_dataprotect_class, class);
3633         }
3634
3635         if (dpflags & O_DP_GETRAWENCRYPTED) {
3636                 if ( flags & (O_RDWR | O_WRONLY)) {
3637                         /* Not allowed to write raw encrypted bytes */
3638                         return EINVAL;
3639                 }
3640                 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3641         }
3642
3643         error = open1(vfs_context_current(), &nd, uap->flags, &va,
3644                       fileproc_alloc_init, NULL, retval);
3645
3646         return error;
3647 }
3648
3649 static int
3650 openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
3651     int fd, enum uio_seg segflg, int *retval)
3652 {
3653         struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
3654         struct vnode_attr va;
3655         struct nameidata nd;
3656         int cmode;
3657
3658         VATTR_INIT(&va);
3659         /* Mask off all but regular access permissions */
3660         cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3661         VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3662
3663         NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
3664             segflg, path, ctx);
3665
3666         return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
3667             retval, fd));
3668 }
3669
3670 int
3671 open(proc_t p, struct open_args *uap, int32_t *retval)
3672 {
3673         __pthread_testcancel(1);
3674         return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3675 }
3676
3677 int
3678 open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
3679     int32_t *retval)
3680 {
3681         return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3682             uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
3683 }
3684
3685 int
3686 openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
3687                 int32_t *retval)
3688 {
3689         return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3690             uap->mode, uap->fd, UIO_USERSPACE, retval));
3691 }
3692
3693 int
3694 openat(proc_t p, struct openat_args *uap, int32_t *retval)
3695 {
3696         __pthread_testcancel(1);
3697         return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
3698 }
3699
3700 /*
3701  * openbyid_np: open a file given a file system id and a file system object id
3702  *      the hfs file system object id is an fsobj_id_t {uint32, uint32}
3703  *      file systems that don't support object ids it is a node id (uint64_t).
3704  *
3705  * Parameters:  p                       Process requesting the open
3706  *              uap                     User argument descriptor (see below)
3707  *              retval                  Pointer to an area to receive the
3708  *                                      return calue from the system call
3709  *
3710  * Indirect:    uap->path               Path to open (same as 'open')
3711  *
3712  *              uap->fsid               id of target file system
3713  *              uap->objid              id of target file system object
3714  *              uap->flags              Flags to open (same as 'open')
3715  *
3716  * Returns:     0                       Success
3717  *              !0                      errno value
3718  *
3719  *
3720  * XXX:         We should enummerate the possible errno values here, and where
3721  *              in the code they originated.
3722  */
3723 int
3724 openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
3725 {
3726         fsid_t fsid;
3727         uint64_t objid;
3728         int error;
3729         char *buf = NULL;
3730         int buflen = MAXPATHLEN;
3731         int pathlen = 0;
3732         vfs_context_t ctx = vfs_context_current();
3733
3734         if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
3735                 return (error);
3736         }
3737
3738         /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3739         if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
3740                 return (error);
3741         }
3742
3743         AUDIT_ARG(value32, fsid.val[0]);
3744         AUDIT_ARG(value64, objid);
3745
3746         /*resolve path from fsis, objid*/
3747         do {
3748                 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
3749                 if (buf == NULL) {
3750                         return (ENOMEM);
3751                 }
3752
3753                 error = fsgetpath_internal(
3754                         ctx, fsid.val[0], objid,
3755                         buflen, buf, &pathlen);
3756
3757                 if (error) {
3758                         FREE(buf, M_TEMP);
3759                         buf = NULL;
3760                 }
3761         } while (error == ENOSPC && (buflen += MAXPATHLEN));
3762
3763         if (error) {
3764                 return error;
3765         }
3766
3767         buf[pathlen] = 0;
3768
3769         error = openat_internal(
3770                 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
3771
3772         FREE(buf, M_TEMP);
3773
3774         return error;
3775 }
3776
3777
3778 /*
3779  * Create a special file.
3780  */
3781 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
3782
3783 int
3784 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
3785 {
3786         struct vnode_attr va;
3787         vfs_context_t ctx = vfs_context_current();
3788         int error;
3789         struct nameidata nd;
3790         vnode_t vp, dvp;
3791
3792         VATTR_INIT(&va);
3793         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3794         VATTR_SET(&va, va_rdev, uap->dev);
3795
3796         /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3797         if ((uap->mode & S_IFMT) == S_IFIFO)
3798                 return(mkfifo1(ctx, uap->path, &va));
3799
3800         AUDIT_ARG(mode, uap->mode);
3801         AUDIT_ARG(value32, uap->dev);
3802
3803         if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
3804                 return (error);
3805         NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
3806                 UIO_USERSPACE, uap->path, ctx);
3807         error = namei(&nd);
3808         if (error)
3809                 return (error);
3810         dvp = nd.ni_dvp;
3811         vp = nd.ni_vp;
3812
3813         if (vp != NULL) {
3814                 error = EEXIST;
3815                 goto out;
3816         }
3817
3818         switch (uap->mode & S_IFMT) {
3819         case S_IFMT:    /* used by badsect to flag bad sectors */
3820                 VATTR_SET(&va, va_type, VBAD);
3821                 break;
3822         case S_IFCHR:
3823                 VATTR_SET(&va, va_type, VCHR);
3824                 break;
3825         case S_IFBLK:
3826                 VATTR_SET(&va, va_type, VBLK);
3827                 break;
3828         default:
3829                 error = EINVAL;
3830                 goto out;
3831         }
3832
3833 #if CONFIG_MACF
3834         error = mac_vnode_check_create(ctx,
3835             nd.ni_dvp, &nd.ni_cnd, &va);
3836         if (error)
3837                 goto out;
3838 #endif
3839
3840         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3841                 goto out;
3842
3843         if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
3844                 goto out;
3845
3846         if (vp) {
3847                 int     update_flags = 0;
3848
3849                 // Make sure the name & parent pointers are hooked up
3850                 if (vp->v_name == NULL)
3851                         update_flags |= VNODE_UPDATE_NAME;
3852                 if (vp->v_parent == NULLVP)
3853                         update_flags |= VNODE_UPDATE_PARENT;
3854
3855                 if (update_flags)
3856                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3857
3858 #if CONFIG_FSE
3859                 add_fsevent(FSE_CREATE_FILE, ctx,
3860                     FSE_ARG_VNODE, vp,
3861                     FSE_ARG_DONE);
3862 #endif
3863         }
3864
3865 out:
3866         /*
3867          * nameidone has to happen before we vnode_put(dvp)
3868          * since it may need to release the fs_nodelock on the dvp
3869          */
3870         nameidone(&nd);
3871
3872         if (vp)
3873                 vnode_put(vp);
3874         vnode_put(dvp);
3875
3876         return (error);
3877 }
3878
3879 /*
3880  * Create a named pipe.
3881  *
3882  * Returns:     0                       Success
3883  *              EEXIST
3884  *      namei:???
3885  *      vnode_authorize:???
3886  *      vn_create:???
3887  */
3888 static int
3889 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
3890 {
3891         vnode_t vp, dvp;
3892         int error;
3893         struct nameidata nd;
3894
3895         NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
3896                 UIO_USERSPACE, upath, ctx);
3897         error = namei(&nd);
3898         if (error)
3899                 return (error);
3900         dvp = nd.ni_dvp;
3901         vp = nd.ni_vp;
3902
3903         /* check that this is a new file and authorize addition */
3904         if (vp != NULL) {
3905                 error = EEXIST;
3906                 goto out;
3907         }
3908         VATTR_SET(vap, va_type, VFIFO);
3909
3910         if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
3911                 goto out;
3912
3913         error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
3914 out:
3915         /*
3916          * nameidone has to happen before we vnode_put(dvp)
3917          * since it may need to release the fs_nodelock on the dvp
3918          */
3919         nameidone(&nd);
3920
3921         if (vp)
3922                 vnode_put(vp);
3923         vnode_put(dvp);
3924
3925         return error;
3926 }
3927
3928
3929 /*
3930  * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
3931  *
3932  * Parameters:  p                       Process requesting the open
3933  *              uap                     User argument descriptor (see below)
3934  *              retval                  (Ignored)
3935  *
3936  * Indirect:    uap->path               Path to fifo (same as 'mkfifo')
3937  *              uap->uid                UID to set
3938  *              uap->gid                GID to set
3939  *              uap->mode               File mode to set (same as 'mkfifo')
3940  *              uap->xsecurity          ACL to set, if creating
3941  *
3942  * Returns:     0                       Success
3943  *              !0                      errno value
3944  *
3945  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
3946  *
3947  * XXX:         We should enummerate the possible errno values here, and where
3948  *              in the code they originated.
3949  */
3950 int
3951 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
3952 {
3953         int ciferror;
3954         kauth_filesec_t xsecdst;
3955         struct vnode_attr va;
3956
3957         AUDIT_ARG(owner, uap->uid, uap->gid);
3958
3959         xsecdst = KAUTH_FILESEC_NONE;
3960         if (uap->xsecurity != USER_ADDR_NULL) {
3961                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
3962                         return ciferror;
3963         }
3964
3965         VATTR_INIT(&va);
3966         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3967         if (uap->uid != KAUTH_UID_NONE)
3968                 VATTR_SET(&va, va_uid, uap->uid);
3969         if (uap->gid != KAUTH_GID_NONE)
3970                 VATTR_SET(&va, va_gid, uap->gid);
3971         if (xsecdst != KAUTH_FILESEC_NONE)
3972                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3973
3974         ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
3975
3976         if (xsecdst != KAUTH_FILESEC_NONE)
3977                 kauth_filesec_free(xsecdst);
3978         return ciferror;
3979 }
3980
3981 /* ARGSUSED */
3982 int
3983 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
3984 {
3985         struct vnode_attr va;
3986
3987         VATTR_INIT(&va);
3988         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3989
3990         return(mkfifo1(vfs_context_current(), uap->path, &va));
3991 }
3992
3993
3994 static char *
3995 my_strrchr(char *p, int ch)
3996 {
3997         char *save;
3998
3999         for (save = NULL;; ++p) {
4000                 if (*p == ch)
4001                         save = p;
4002                 if (!*p)
4003                         return(save);
4004         }
4005         /* NOTREACHED */
4006 }
4007
4008 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4009
4010 int
4011 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4012 {
4013         int ret, len = _len;
4014
4015         *truncated_path = 0;
4016         ret = vn_getpath(dvp, path, &len);
4017         if (ret == 0 && len < (MAXPATHLEN - 1)) {
4018                 if (leafname) {
4019                         path[len-1] = '/';
4020                         len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
4021                         if (len > MAXPATHLEN) {
4022                                 char *ptr;
4023
4024                                 // the string got truncated!
4025                                 *truncated_path = 1;
4026                                 ptr = my_strrchr(path, '/');
4027                                 if (ptr) {
4028                                         *ptr = '\0';   // chop off the string at the last directory component
4029                                 }
4030                                 len = strlen(path) + 1;
4031                         }
4032                 }
4033         } else if (ret == 0) {
4034                 *truncated_path = 1;
4035         } else if (ret != 0) {
4036                 struct vnode *mydvp=dvp;
4037
4038                 if (ret != ENOSPC) {
4039                         printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4040                                dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
4041                 }
4042                 *truncated_path = 1;
4043
4044                 do {
4045                         if (mydvp->v_parent != NULL) {
4046                                 mydvp = mydvp->v_parent;
4047                         } else if (mydvp->v_mount) {
4048                                 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4049                                 break;
4050                         } else {
4051                                 // no parent and no mount point?  only thing is to punt and say "/" changed
4052                                 strlcpy(path, "/", _len);
4053                                 len = 2;
4054                                 mydvp = NULL;
4055                         }
4056
4057                         if (mydvp == NULL) {
4058                                 break;
4059                         }
4060
4061                         len = _len;
4062                         ret = vn_getpath(mydvp, path, &len);
4063                 } while (ret == ENOSPC);
4064         }
4065
4066         return len;
4067 }
4068
4069
4070 /*
4071  * Make a hard file link.
4072  *
4073  * Returns:     0                       Success
4074  *              EPERM
4075  *              EEXIST
4076  *              EXDEV
4077  *      namei:???
4078  *      vnode_authorize:???
4079  *      VNOP_LINK:???
4080  */
4081 /* ARGSUSED */
4082 static int
4083 linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4084     user_addr_t link, int flag, enum uio_seg segflg)
4085 {
4086         vnode_t vp, dvp, lvp;
4087         struct nameidata nd;
4088         int follow;
4089         int error;
4090 #if CONFIG_FSE
4091         fse_info finfo;
4092 #endif
4093         int need_event, has_listeners;
4094         char *target_path = NULL;
4095         int truncated=0;
4096
4097         vp = dvp = lvp = NULLVP;
4098
4099         /* look up the object we are linking to */
4100         follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4101         NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4102             segflg, path, ctx);
4103
4104         error = nameiat(&nd, fd1);
4105         if (error)
4106                 return (error);
4107         vp = nd.ni_vp;
4108
4109         nameidone(&nd);
4110
4111         /*
4112          * Normally, linking to directories is not supported.
4113          * However, some file systems may have limited support.
4114          */
4115         if (vp->v_type == VDIR) {
4116                 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
4117                         error = EPERM;   /* POSIX */
4118                         goto out;
4119                 }
4120                 /* Linking to a directory requires ownership. */
4121                 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4122                         struct vnode_attr dva;
4123
4124                         VATTR_INIT(&dva);
4125                         VATTR_WANTED(&dva, va_uid);
4126                         if (vnode_getattr(vp, &dva, ctx) != 0 ||
4127                             !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4128                             (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4129                                 error = EACCES;
4130                                 goto out;
4131                         }
4132                 }
4133         }
4134
4135         /* lookup the target node */
4136 #if CONFIG_TRIGGERS
4137         nd.ni_op = OP_LINK;
4138 #endif
4139         nd.ni_cnd.cn_nameiop = CREATE;
4140         nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
4141         nd.ni_dirp = link;
4142         error = nameiat(&nd, fd2);
4143         if (error != 0)
4144                 goto out;
4145         dvp = nd.ni_dvp;
4146         lvp = nd.ni_vp;
4147
4148 #if CONFIG_MACF
4149         if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
4150                 goto out2;
4151 #endif
4152
4153         /* or to anything that kauth doesn't want us to (eg. immutable items) */
4154         if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
4155                 goto out2;
4156
4157         /* target node must not exist */
4158         if (lvp != NULLVP) {
4159                 error = EEXIST;
4160                 goto out2;
4161         }
4162         /* cannot link across mountpoints */
4163         if (vnode_mount(vp) != vnode_mount(dvp)) {
4164                 error = EXDEV;
4165                 goto out2;
4166         }
4167
4168         /* authorize creation of the target note */
4169         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4170                 goto out2;
4171
4172         /* and finally make the link */
4173         error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
4174         if (error)
4175                 goto out2;
4176
4177 #if CONFIG_MACF
4178         (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4179 #endif
4180
4181 #if CONFIG_FSE
4182         need_event = need_fsevent(FSE_CREATE_FILE, dvp);
4183 #else
4184         need_event = 0;
4185 #endif
4186         has_listeners = kauth_authorize_fileop_has_listeners();
4187
4188         if (need_event || has_listeners) {
4189                 char *link_to_path = NULL;
4190                 int len, link_name_len;
4191
4192                 /* build the path to the new link file */
4193                 GET_PATH(target_path);
4194                 if (target_path == NULL) {
4195                         error = ENOMEM;
4196                         goto out2;
4197                 }
4198
4199                 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
4200
4201                 if (has_listeners) {
4202                         /* build the path to file we are linking to */
4203                         GET_PATH(link_to_path);
4204                         if (link_to_path == NULL) {
4205                                 error = ENOMEM;
4206                                 goto out2;
4207                         }
4208
4209                         link_name_len = MAXPATHLEN;
4210                         if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4211                                 /*
4212                                  * Call out to allow 3rd party notification of rename.
4213                                  * Ignore result of kauth_authorize_fileop call.
4214                                  */
4215                                 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4216                                                        (uintptr_t)link_to_path,
4217                                                        (uintptr_t)target_path);
4218                         }
4219                         if (link_to_path != NULL) {
4220                                 RELEASE_PATH(link_to_path);
4221                         }
4222                 }
4223 #if CONFIG_FSE
4224                 if (need_event) {
4225                         /* construct fsevent */
4226                         if (get_fse_info(vp, &finfo, ctx) == 0) {
4227                                 if (truncated) {
4228                                         finfo.mode |= FSE_TRUNCATED_PATH;
4229                                 }
4230
4231                                 // build the path to the destination of the link
4232                                 add_fsevent(FSE_CREATE_FILE, ctx,
4233                                             FSE_ARG_STRING, len, target_path,
4234                                             FSE_ARG_FINFO, &finfo,
4235                                             FSE_ARG_DONE);
4236                         }
4237                         if (vp->v_parent) {
4238                             add_fsevent(FSE_STAT_CHANGED, ctx,
4239                                 FSE_ARG_VNODE, vp->v_parent,
4240                                 FSE_ARG_DONE);
4241                         }
4242                 }
4243 #endif
4244         }
4245 out2:
4246         /*
4247          * nameidone has to happen before we vnode_put(dvp)
4248          * since it may need to release the fs_nodelock on the dvp
4249          */
4250         nameidone(&nd);
4251         if (target_path != NULL) {
4252                 RELEASE_PATH(target_path);
4253         }
4254 out:
4255         if (lvp)
4256                 vnode_put(lvp);
4257         if (dvp)
4258                 vnode_put(dvp);
4259         vnode_put(vp);
4260         return (error);
4261 }
4262
4263 int
4264 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4265 {
4266         return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4267             AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
4268 }
4269
4270 int
4271 linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4272 {
4273         if (uap->flag & ~AT_SYMLINK_FOLLOW)
4274                 return (EINVAL);
4275
4276         return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4277             uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
4278 }
4279
4280 /*
4281  * Make a symbolic link.
4282  *
4283  * We could add support for ACLs here too...
4284  */
4285 /* ARGSUSED */
4286 static int
4287 symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4288     user_addr_t link, enum uio_seg segflg)
4289 {
4290         struct vnode_attr va;
4291         char *path;
4292         int error;
4293         struct nameidata nd;
4294         vnode_t vp, dvp;
4295         uint32_t dfflags;       // Directory file flags
4296         size_t dummy=0;
4297         proc_t p;
4298
4299         error = 0;
4300         if (UIO_SEG_IS_USER_SPACE(segflg)) {
4301                 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4302                 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4303         } else {
4304                 path = (char *)path_data;
4305         }
4306         if (error)
4307                 goto out;
4308         AUDIT_ARG(text, path);  /* This is the link string */
4309
4310         NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4311             segflg, link, ctx);
4312
4313         error = nameiat(&nd, fd);
4314         if (error)
4315                 goto out;
4316         dvp = nd.ni_dvp;
4317         vp = nd.ni_vp;
4318
4319         p = vfs_context_proc(ctx);
4320         VATTR_INIT(&va);
4321         VATTR_SET(&va, va_type, VLNK);
4322         VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
4323
4324         /*
4325          * Handle inheritance of restricted flag
4326          */
4327         error = vnode_flags(dvp, &dfflags, ctx);
4328         if (error)
4329                 goto skipit;
4330         if (dfflags & SF_RESTRICTED)
4331                 VATTR_SET(&va, va_flags, SF_RESTRICTED);
4332
4333 #if CONFIG_MACF
4334         error = mac_vnode_check_create(ctx,
4335                         dvp, &nd.ni_cnd, &va);
4336 #endif
4337         if (error != 0) {
4338             goto skipit;
4339         }
4340
4341         if (vp != NULL) {
4342             error = EEXIST;
4343             goto skipit;
4344         }
4345
4346         /* authorize */
4347         if (error == 0)
4348                 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
4349         /* get default ownership, etc. */
4350         if (error == 0)
4351                 error = vnode_authattr_new(dvp, &va, 0, ctx);
4352         if (error == 0)
4353                 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
4354
4355 #if CONFIG_MACF
4356         if (error == 0)
4357                 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
4358 #endif
4359
4360         /* do fallback attribute handling */
4361         if (error == 0)
4362                 error = vnode_setattr_fallback(vp, &va, ctx);
4363
4364         if (error == 0) {
4365                 int     update_flags = 0;
4366
4367                 if (vp == NULL) {
4368                         nd.ni_cnd.cn_nameiop = LOOKUP;
4369 #if CONFIG_TRIGGERS
4370                         nd.ni_op = OP_LOOKUP;
4371 #endif
4372                         nd.ni_cnd.cn_flags = 0;
4373                         error = nameiat(&nd, fd);
4374                         vp = nd.ni_vp;
4375
4376                         if (vp == NULL)
4377                                 goto skipit;
4378                 }
4379
4380 #if 0  /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4381                 /* call out to allow 3rd party notification of rename.
4382                  * Ignore result of kauth_authorize_fileop call.
4383                  */
4384                 if (kauth_authorize_fileop_has_listeners() &&
4385                     namei(&nd) == 0) {
4386                         char *new_link_path = NULL;
4387                         int             len;
4388
4389                         /* build the path to the new link file */
4390                         new_link_path = get_pathbuff();
4391                         len = MAXPATHLEN;
4392                         vn_getpath(dvp, new_link_path, &len);
4393                         if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
4394                                 new_link_path[len - 1] = '/';
4395                                 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
4396                         }
4397
4398                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
4399                                            (uintptr_t)path, (uintptr_t)new_link_path);
4400                         if (new_link_path != NULL)
4401                                 release_pathbuff(new_link_path);
4402                 }
4403 #endif
4404                 // Make sure the name & parent pointers are hooked up
4405                 if (vp->v_name == NULL)
4406                         update_flags |= VNODE_UPDATE_NAME;
4407                 if (vp->v_parent == NULLVP)
4408                         update_flags |= VNODE_UPDATE_PARENT;
4409
4410                 if (update_flags)
4411                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4412
4413 #if CONFIG_FSE
4414                 add_fsevent(FSE_CREATE_FILE, ctx,
4415                             FSE_ARG_VNODE, vp,
4416                             FSE_ARG_DONE);
4417 #endif
4418         }
4419
4420 skipit:
4421         /*
4422          * nameidone has to happen before we vnode_put(dvp)
4423          * since it may need to release the fs_nodelock on the dvp
4424          */
4425         nameidone(&nd);
4426
4427         if (vp)
4428                 vnode_put(vp);
4429         vnode_put(dvp);
4430 out:
4431         if (path && (path != (char *)path_data))
4432                 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
4433
4434         return (error);
4435 }
4436
4437 int
4438 symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4439 {
4440         return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4441             uap->link, UIO_USERSPACE));
4442 }
4443
4444 int
4445 symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4446     __unused int32_t *retval)
4447 {
4448         return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4449             uap->path2, UIO_USERSPACE));
4450 }
4451
4452 /*
4453  * Delete a whiteout from the filesystem.
4454  * No longer supported.
4455  */
4456 int
4457 undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
4458 {
4459         return (ENOTSUP);
4460 }
4461
4462 /*
4463  * Delete a name from the filesystem.
4464  */
4465 /* ARGSUSED */
4466 static int
4467 unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
4468     user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
4469 {
4470         struct nameidata nd;
4471         vnode_t vp, dvp;
4472         int error;
4473         struct componentname *cnp;
4474         char  *path = NULL;
4475         int  len=0;
4476 #if CONFIG_FSE
4477         fse_info  finfo;
4478         struct vnode_attr va;
4479 #endif
4480         int flags;
4481         int need_event;
4482         int has_listeners;
4483         int truncated_path;
4484         int batched;
4485         struct vnode_attr *vap;
4486         int do_retry;
4487         int retry_count = 0;
4488         int cn_flags;
4489
4490         cn_flags = LOCKPARENT;
4491         if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH))
4492                 cn_flags |= AUDITVNPATH1;
4493         /* If a starting dvp is passed, it trumps any fd passed. */
4494         if (start_dvp)
4495                 cn_flags |= USEDVP;
4496
4497 #if NAMEDRSRCFORK
4498         /* unlink or delete is allowed on rsrc forks and named streams */
4499         cn_flags |= CN_ALLOWRSRCFORK;
4500 #endif
4501
4502 retry:
4503         do_retry = 0;
4504         flags = 0;
4505         need_event = 0;
4506         has_listeners = 0;
4507         truncated_path = 0;
4508         vap = NULL;
4509
4510         NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
4511
4512         nd.ni_dvp = start_dvp;
4513         nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
4514         cnp = &nd.ni_cnd;
4515
4516 lookup_continue:
4517         error = nameiat(&nd, fd);
4518         if (error)
4519                 return (error);
4520
4521         dvp = nd.ni_dvp;
4522         vp = nd.ni_vp;
4523
4524
4525         /* With Carbon delete semantics, busy files cannot be deleted */
4526         if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
4527                 flags |= VNODE_REMOVE_NODELETEBUSY;
4528         }
4529
4530         /* Skip any potential upcalls if told to. */
4531         if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4532                 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4533         }
4534
4535         if (vp) {
4536                 batched = vnode_compound_remove_available(vp);
4537                 /*
4538                  * The root of a mounted filesystem cannot be deleted.
4539                  */
4540                 if (vp->v_flag & VROOT) {
4541                         error = EBUSY;
4542                 }
4543
4544                 if (!batched) {
4545                         error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4546                         if (error) {
4547                                 if (error == ENOENT &&
4548                                     retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4549                                         do_retry = 1;
4550                                         retry_count++;
4551                                 }
4552                                 goto out;
4553                         }
4554                 }
4555         } else {
4556                 batched = 1;
4557
4558                 if (!vnode_compound_remove_available(dvp)) {
4559                         panic("No vp, but no compound remove?");
4560                 }
4561         }
4562
4563 #if CONFIG_FSE
4564         need_event = need_fsevent(FSE_DELETE, dvp);
4565         if (need_event) {
4566                 if (!batched) {
4567                         if ((vp->v_flag & VISHARDLINK) == 0) {
4568                                 /* XXX need to get these data in batched VNOP */
4569                                 get_fse_info(vp, &finfo, ctx);
4570                         }
4571                 } else {
4572                         error = vfs_get_notify_attributes(&va);
4573                         if (error) {
4574                                 goto out;
4575                         }
4576
4577                         vap = &va;
4578                 }
4579         }
4580 #endif
4581         has_listeners = kauth_authorize_fileop_has_listeners();
4582         if (need_event || has_listeners) {
4583                 if (path == NULL) {
4584                         GET_PATH(path);
4585                         if (path == NULL) {
4586                                 error = ENOMEM;
4587                                 goto out;
4588                         }
4589                 }
4590                 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
4591         }
4592
4593 #if NAMEDRSRCFORK
4594         if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK)
4595                 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4596         else
4597 #endif
4598         {
4599                 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
4600                 vp = nd.ni_vp;
4601                 if (error == EKEEPLOOKING) {
4602                         if (!batched) {
4603                                 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4604                         }
4605
4606                         if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
4607                                 panic("EKEEPLOOKING, but continue flag not set?");
4608                         }
4609
4610                         if (vnode_isdir(vp)) {
4611                                 error = EISDIR;
4612                                 goto out;
4613                         }
4614                         goto lookup_continue;
4615                 } else if (error == ENOENT && batched &&
4616                     retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4617                         /*
4618                          * For compound VNOPs, the authorization callback may
4619                          * return ENOENT in case of racing hardlink lookups
4620                          * hitting the name  cache, redrive the lookup.
4621                          */
4622                         do_retry = 1;
4623                         retry_count += 1;
4624                         goto out;
4625                 }
4626         }
4627
4628         /*
4629          * Call out to allow 3rd party notification of delete.
4630          * Ignore result of kauth_authorize_fileop call.
4631          */
4632         if (!error) {
4633                 if (has_listeners) {
4634                         kauth_authorize_fileop(vfs_context_ucred(ctx),
4635                                 KAUTH_FILEOP_DELETE,
4636                                 (uintptr_t)vp,
4637                                 (uintptr_t)path);
4638                 }
4639
4640                 if (vp->v_flag & VISHARDLINK) {
4641                     //
4642                     // if a hardlink gets deleted we want to blow away the
4643                     // v_parent link because the path that got us to this
4644                     // instance of the link is no longer valid.  this will
4645                     // force the next call to get the path to ask the file
4646                     // system instead of just following the v_parent link.
4647                     //
4648                     vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
4649                 }
4650
4651 #if CONFIG_FSE
4652                 if (need_event) {
4653                         if (vp->v_flag & VISHARDLINK) {
4654                                 get_fse_info(vp, &finfo, ctx);
4655                         } else if (vap) {
4656                                 vnode_get_fse_info_from_vap(vp, &finfo, vap);
4657                         }
4658                         if (truncated_path) {
4659                                 finfo.mode |= FSE_TRUNCATED_PATH;
4660                         }
4661                         add_fsevent(FSE_DELETE, ctx,
4662                                                 FSE_ARG_STRING, len, path,
4663                                                 FSE_ARG_FINFO, &finfo,
4664                                                 FSE_ARG_DONE);
4665                 }
4666 #endif
4667         }
4668
4669 out:
4670         if (path != NULL)
4671                 RELEASE_PATH(path);
4672
4673 #if NAMEDRSRCFORK
4674         /* recycle the deleted rsrc fork vnode to force a reclaim, which
4675          * will cause its shadow file to go away if necessary.
4676          */
4677          if (vp && (vnode_isnamedstream(vp)) &&
4678                 (vp->v_parent != NULLVP) &&
4679                 vnode_isshadow(vp)) {
4680                         vnode_recycle(vp);
4681          }
4682 #endif
4683         /*
4684          * nameidone has to happen before we vnode_put(dvp)
4685          * since it may need to release the fs_nodelock on the dvp
4686          */
4687         nameidone(&nd);
4688         vnode_put(dvp);
4689         if (vp) {
4690                 vnode_put(vp);
4691         }
4692
4693         if (do_retry) {
4694                 goto retry;
4695         }
4696
4697         return (error);
4698 }
4699
4700 int
4701 unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
4702     enum uio_seg segflg, int unlink_flags)
4703 {
4704         return (unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
4705             unlink_flags));
4706 }
4707
4708 /*
4709  * Delete a name from the filesystem using Carbon semantics.
4710  */
4711 int
4712 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
4713 {
4714         return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4715             uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY));
4716 }
4717
4718 /*
4719  * Delete a name from the filesystem using POSIX semantics.
4720  */
4721 int
4722 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
4723 {
4724         return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4725             uap->path, UIO_USERSPACE, 0));
4726 }
4727
4728 int
4729 unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
4730 {
4731         if (uap->flag & ~AT_REMOVEDIR)
4732                 return (EINVAL);
4733
4734         if (uap->flag & AT_REMOVEDIR)
4735                 return (rmdirat_internal(vfs_context_current(), uap->fd,
4736                     uap->path, UIO_USERSPACE));
4737         else
4738                 return (unlinkat_internal(vfs_context_current(), uap->fd,
4739                     NULLVP, uap->path, UIO_USERSPACE, 0));
4740 }
4741
4742 /*
4743  * Reposition read/write file offset.
4744  */
4745 int
4746 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
4747 {
4748         struct fileproc *fp;
4749         vnode_t vp;
4750         struct vfs_context *ctx;
4751         off_t offset = uap->offset, file_size;
4752         int error;
4753
4754         if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4755                 if (error == ENOTSUP)
4756                         return (ESPIPE);
4757                 return (error);
4758         }
4759         if (vnode_isfifo(vp)) {
4760                 file_drop(uap->fd);
4761                 return(ESPIPE);
4762         }
4763
4764
4765         ctx = vfs_context_current();
4766 #if CONFIG_MACF
4767         if (uap->whence == L_INCR && uap->offset == 0)
4768                 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4769                     fp->f_fglob);
4770         else
4771                 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4772                     fp->f_fglob);
4773         if (error) {
4774                 file_drop(uap->fd);
4775                 return (error);
4776         }
4777 #endif
4778         if ( (error = vnode_getwithref(vp)) ) {
4779                 file_drop(uap->fd);
4780                 return(error);
4781         }
4782
4783         switch (uap->whence) {
4784         case L_INCR:
4785                 offset += fp->f_fglob->fg_offset;
4786                 break;
4787         case L_XTND:
4788                 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
4789                         break;
4790                 offset += file_size;
4791                 break;
4792         case L_SET:
4793                 break;
4794         default:
4795                 error = EINVAL;
4796         }
4797         if (error == 0) {
4798                 if (uap->offset > 0 && offset < 0) {
4799                         /* Incremented/relative move past max size */
4800                         error = EOVERFLOW;
4801                 } else {
4802                         /*
4803                          * Allow negative offsets on character devices, per
4804                          * POSIX 1003.1-2001.  Most likely for writing disk
4805                          * labels.
4806                          */
4807                         if (offset < 0 && vp->v_type != VCHR) {
4808                                 /* Decremented/relative move before start */
4809                                 error = EINVAL;
4810                         } else {
4811                                 /* Success */
4812                                 fp->f_fglob->fg_offset = offset;
4813                                 *retval = fp->f_fglob->fg_offset;
4814                         }
4815                 }
4816         }
4817
4818         /*
4819          * An lseek can affect whether data is "available to read."  Use
4820          * hint of NOTE_NONE so no EVFILT_VNODE events fire
4821          */
4822         post_event_if_success(vp, error, NOTE_NONE);
4823         (void)vnode_put(vp);
4824         file_drop(uap->fd);
4825         return (error);
4826 }
4827
4828
4829 /*
4830  * Check access permissions.
4831  *
4832  * Returns:     0                       Success
4833  *              vnode_authorize:???
4834  */
4835 static int
4836 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
4837 {
4838         kauth_action_t action;
4839         int error;
4840
4841         /*
4842          * If just the regular access bits, convert them to something
4843          * that vnode_authorize will understand.
4844          */
4845         if (!(uflags & _ACCESS_EXTENDED_MASK)) {
4846                 action = 0;
4847                 if (uflags & R_OK)
4848                         action |= KAUTH_VNODE_READ_DATA;        /* aka KAUTH_VNODE_LIST_DIRECTORY */
4849                 if (uflags & W_OK) {
4850                         if (vnode_isdir(vp)) {
4851                                 action |= KAUTH_VNODE_ADD_FILE |
4852                                     KAUTH_VNODE_ADD_SUBDIRECTORY;
4853                                 /* might want delete rights here too */
4854                         } else {
4855                                 action |= KAUTH_VNODE_WRITE_DATA;
4856                         }
4857                 }
4858                 if (uflags & X_OK) {
4859                         if (vnode_isdir(vp)) {
4860                                 action |= KAUTH_VNODE_SEARCH;
4861                         } else {
4862                                 action |= KAUTH_VNODE_EXECUTE;
4863                         }
4864                 }
4865         } else {
4866                 /* take advantage of definition of uflags */
4867                 action = uflags >> 8;
4868         }
4869
4870 #if CONFIG_MACF
4871         error = mac_vnode_check_access(ctx, vp, uflags);
4872         if (error)
4873                 return (error);
4874 #endif /* MAC */
4875
4876         /* action == 0 means only check for existence */
4877         if (action != 0) {
4878                 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
4879         } else {
4880                 error = 0;
4881         }
4882
4883         return(error);
4884 }
4885
4886
4887
4888 /*
4889  * access_extended: Check access permissions in bulk.
4890  *
4891  * Description: uap->entries            Pointer to an array of accessx
4892  *                                      descriptor structs, plus one or
4893  *                                      more NULL terminated strings (see
4894  *                                      "Notes" section below).
4895  *              uap->size               Size of the area pointed to by
4896  *                                      uap->entries.
4897  *              uap->results            Pointer to the results array.
4898  *
4899  * Returns:     0                       Success
4900  *              ENOMEM                  Insufficient memory
4901  *              EINVAL                  Invalid arguments
4902  *              namei:EFAULT            Bad address
4903  *              namei:ENAMETOOLONG      Filename too long
4904  *              namei:ENOENT            No such file or directory
4905  *              namei:ELOOP             Too many levels of symbolic links
4906  *              namei:EBADF             Bad file descriptor
4907  *              namei:ENOTDIR           Not a directory
4908  *              namei:???
4909  *              access1:
4910  *
4911  * Implicit returns:
4912  *              uap->results            Array contents modified
4913  *
4914  * Notes:       The uap->entries are structured as an arbitrary length array
4915  *              of accessx descriptors, followed by one or more NULL terminated
4916  *              strings
4917  *
4918  *                      struct accessx_descriptor[0]
4919  *                      ...
4920  *                      struct accessx_descriptor[n]
4921  *                      char name_data[0];
4922  *
4923  *              We determine the entry count by walking the buffer containing
4924  *              the uap->entries argument descriptor.  For each descriptor we
4925  *              see, the valid values for the offset ad_name_offset will be
4926  *              in the byte range:
4927  *
4928  *                      [ uap->entries + sizeof(struct accessx_descriptor) ]
4929  *                                              to
4930  *                              [ uap->entries + uap->size - 2 ]
4931  *
4932  *              since we must have at least one string, and the string must
4933  *              be at least one character plus the NULL terminator in length.
4934  *
4935  * XXX:         Need to support the check-as uid argument
4936  */
4937 int
4938 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
4939 {
4940         struct accessx_descriptor *input = NULL;
4941         errno_t *result = NULL;
4942         errno_t error = 0;
4943         int wantdelete = 0;
4944         unsigned int desc_max, desc_actual, i, j;
4945         struct vfs_context context;
4946         struct nameidata nd;
4947         int niopts;
4948         vnode_t vp = NULL;
4949         vnode_t dvp = NULL;
4950 #define ACCESSX_MAX_DESCR_ON_STACK 10
4951         struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
4952
4953         context.vc_ucred = NULL;
4954
4955         /*
4956          * Validate parameters; if valid, copy the descriptor array and string
4957          * arguments into local memory.  Before proceeding, the following
4958          * conditions must have been met:
4959          *
4960          * o    The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
4961          * o    There must be sufficient room in the request for at least one
4962          *      descriptor and a one yte NUL terminated string.
4963          * o    The allocation of local storage must not fail.
4964          */
4965         if (uap->size > ACCESSX_MAX_TABLESIZE)
4966                 return(ENOMEM);
4967         if (uap->size < (sizeof(struct accessx_descriptor) + 2))
4968                 return(EINVAL);
4969         if (uap->size <= sizeof (stack_input)) {
4970                 input = stack_input;
4971         } else {
4972         MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
4973         if (input == NULL) {
4974                 error = ENOMEM;
4975                 goto out;
4976         }
4977         }
4978         error = copyin(uap->entries, input, uap->size);
4979         if (error)
4980                 goto out;
4981
4982         AUDIT_ARG(opaque, input, uap->size);
4983
4984         /*
4985          * Force NUL termination of the copyin buffer to avoid nami() running
4986          * off the end.  If the caller passes us bogus data, they may get a
4987          * bogus result.
4988          */
4989         ((char *)input)[uap->size - 1] = 0;
4990
4991         /*
4992          * Access is defined as checking against the process' real identity,
4993          * even if operations are checking the effective identity.  This
4994          * requires that we use a local vfs context.
4995          */
4996         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
4997         context.vc_thread = current_thread();
4998
4999         /*
5000          * Find out how many entries we have, so we can allocate the result
5001          * array by walking the list and adjusting the count downward by the
5002          * earliest string offset we see.
5003          */
5004         desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
5005         desc_actual = desc_max;
5006         for (i = 0; i < desc_actual; i++) {
5007                 /*
5008                  * Take the offset to the name string for this entry and
5009                  * convert to an input array index, which would be one off
5010                  * the end of the array if this entry was the lowest-addressed
5011                  * name string.
5012                  */
5013                 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
5014
5015                 /*
5016                  * An offset greater than the max allowable offset is an error.
5017                  * It is also an error for any valid entry to point
5018                  * to a location prior to the end of the current entry, if
5019                  * it's not a reference to the string of the previous entry.
5020                  */
5021                 if (j > desc_max || (j != 0 && j <= i)) {
5022                         error = EINVAL;
5023                         goto out;
5024                 }
5025
5026                 /*
5027                  * An offset of 0 means use the previous descriptor's offset;
5028                  * this is used to chain multiple requests for the same file
5029                  * to avoid multiple lookups.
5030                  */
5031                 if (j == 0) {
5032                         /* This is not valid for the first entry */
5033                         if (i == 0) {
5034                                 error = EINVAL;
5035                                 goto out;
5036                         }
5037                         continue;
5038                 }
5039
5040                 /*
5041                  * If the offset of the string for this descriptor is before
5042                  * what we believe is the current actual last descriptor,
5043                  * then we need to adjust our estimate downward; this permits
5044                  * the string table following the last descriptor to be out
5045                  * of order relative to the descriptor list.
5046                  */
5047                 if (j < desc_actual)
5048                         desc_actual = j;
5049         }
5050
5051         /*
5052          * We limit the actual number of descriptors we are willing to process
5053          * to a hard maximum of ACCESSX_MAX_DESCRIPTORS.  If the number being
5054          * requested does not exceed this limit,
5055          */
5056         if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
5057                 error = ENOMEM;
5058                 goto out;
5059         }
5060         MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
5061         if (result == NULL) {
5062                 error = ENOMEM;
5063                 goto out;
5064         }
5065
5066         /*
5067          * Do the work by iterating over the descriptor entries we know to
5068          * at least appear to contain valid data.
5069          */
5070         error = 0;
5071         for (i = 0; i < desc_actual; i++) {
5072                 /*
5073                  * If the ad_name_offset is 0, then we use the previous
5074                  * results to make the check; otherwise, we are looking up
5075                  * a new file name.
5076                  */
5077                 if (input[i].ad_name_offset != 0) {
5078                         /* discard old vnodes */
5079                         if (vp) {
5080                                 vnode_put(vp);
5081                                 vp = NULL;
5082                         }
5083                         if (dvp) {
5084                                 vnode_put(dvp);
5085                                 dvp = NULL;
5086                         }
5087
5088                         /*
5089                          * Scan forward in the descriptor list to see if we
5090                          * need the parent vnode.  We will need it if we are
5091                          * deleting, since we must have rights  to remove
5092                          * entries in the parent directory, as well as the
5093                          * rights to delete the object itself.
5094                          */
5095                         wantdelete = input[i].ad_flags & _DELETE_OK;
5096                         for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
5097                                 if (input[j].ad_flags & _DELETE_OK)
5098                                         wantdelete = 1;
5099
5100                         niopts = FOLLOW | AUDITVNPATH1;
5101
5102                         /* need parent for vnode_authorize for deletion test */
5103                         if (wantdelete)
5104                                 niopts |= WANTPARENT;
5105
5106                         /* do the lookup */
5107                         NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5108                                CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5109                                &context);
5110                         error = namei(&nd);
5111                         if (!error) {
5112                                 vp = nd.ni_vp;
5113                                 if (wantdelete)
5114                                         dvp = nd.ni_dvp;
5115                         }
5116                         nameidone(&nd);
5117                 }
5118
5119                 /*
5120                  * Handle lookup errors.
5121                  */
5122                 switch(error) {
5123                 case ENOENT:
5124                 case EACCES:
5125                 case EPERM:
5126                 case ENOTDIR:
5127                         result[i] = error;
5128                         break;
5129                 case 0:
5130                         /* run this access check */
5131                         result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5132                         break;
5133                 default:
5134                         /* fatal lookup error */
5135
5136                         goto out;
5137                 }
5138         }
5139
5140         AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5141
5142         /* copy out results */
5143         error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
5144
5145 out:
5146         if (input && input != stack_input)
5147                 FREE(input, M_TEMP);
5148         if (result)
5149                 FREE(result, M_TEMP);
5150         if (vp)
5151                 vnode_put(vp);
5152         if (dvp)
5153                 vnode_put(dvp);
5154         if (IS_VALID_CRED(context.vc_ucred))
5155                 kauth_cred_unref(&context.vc_ucred);
5156         return(error);
5157 }
5158
5159
5160 /*
5161  * Returns:     0                       Success
5162  *              namei:EFAULT            Bad address
5163  *              namei:ENAMETOOLONG      Filename too long
5164  *              namei:ENOENT            No such file or directory
5165  *              namei:ELOOP             Too many levels of symbolic links
5166  *              namei:EBADF             Bad file descriptor
5167  *              namei:ENOTDIR           Not a directory
5168  *              namei:???
5169  *              access1:
5170  */
5171 static int
5172 faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5173     int flag, enum uio_seg segflg)
5174 {
5175         int error;
5176         struct nameidata nd;
5177         int niopts;
5178         struct vfs_context context;
5179 #if NAMEDRSRCFORK
5180         int is_namedstream = 0;
5181 #endif
5182
5183         /*
5184          * Unless the AT_EACCESS option is used, Access is defined as checking
5185          * against the process' real identity, even if operations are checking
5186          * the effective identity.  So we need to tweak the credential
5187          * in the context for that case.
5188          */
5189         if (!(flag & AT_EACCESS))
5190                 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5191         else
5192                 context.vc_ucred = ctx->vc_ucred;
5193         context.vc_thread = ctx->vc_thread;
5194
5195
5196         niopts = FOLLOW | AUDITVNPATH1;
5197         /* need parent for vnode_authorize for deletion test */
5198         if (amode & _DELETE_OK)
5199                 niopts |= WANTPARENT;
5200         NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5201                path, &context);
5202
5203 #if NAMEDRSRCFORK
5204         /* access(F_OK) calls are allowed for resource forks. */
5205         if (amode == F_OK)
5206                 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5207 #endif
5208         error = nameiat(&nd, fd);
5209         if (error)
5210                 goto out;
5211
5212 #if NAMEDRSRCFORK
5213         /* Grab reference on the shadow stream file vnode to
5214          * force an inactive on release which will mark it
5215          * for recycle.
5216          */
5217         if (vnode_isnamedstream(nd.ni_vp) &&
5218             (nd.ni_vp->v_parent != NULLVP) &&
5219             vnode_isshadow(nd.ni_vp)) {
5220                 is_namedstream = 1;
5221                 vnode_ref(nd.ni_vp);
5222         }
5223 #endif
5224
5225         error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
5226
5227 #if NAMEDRSRCFORK
5228         if (is_namedstream) {
5229                 vnode_rele(nd.ni_vp);
5230         }
5231 #endif
5232
5233         vnode_put(nd.ni_vp);
5234         if (amode & _DELETE_OK)
5235                 vnode_put(nd.ni_dvp);
5236         nameidone(&nd);
5237
5238 out:
5239         if (!(flag & AT_EACCESS))
5240                 kauth_cred_unref(&context.vc_ucred);
5241         return (error);
5242 }
5243
5244 int
5245 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5246 {
5247         return (faccessat_internal(vfs_context_current(), AT_FDCWD,
5248             uap->path, uap->flags, 0, UIO_USERSPACE));
5249 }
5250
5251 int
5252 faccessat(__unused proc_t p, struct faccessat_args *uap,
5253           __unused int32_t *retval)
5254 {
5255         if (uap->flag & ~AT_EACCESS)
5256                 return (EINVAL);
5257
5258         return (faccessat_internal(vfs_context_current(), uap->fd,
5259             uap->path, uap->amode, uap->flag, UIO_USERSPACE));
5260 }
5261
5262 /*
5263  * Returns:     0                       Success
5264  *              EFAULT
5265  *      copyout:EFAULT
5266  *      namei:???
5267  *      vn_stat:???
5268  */
5269 static int
5270 fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5271     user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5272     enum uio_seg segflg, int fd, int flag)
5273 {
5274         struct nameidata nd;
5275         int follow;
5276         union {
5277                 struct stat sb;
5278                 struct stat64 sb64;
5279         } source;
5280         union {
5281                 struct user64_stat user64_sb;
5282                 struct user32_stat user32_sb;
5283                 struct user64_stat64 user64_sb64;
5284                 struct user32_stat64 user32_sb64;
5285         } dest;
5286         caddr_t sbp;
5287         int error, my_size;
5288         kauth_filesec_t fsec;
5289         size_t xsecurity_bufsize;
5290         void * statptr;
5291
5292         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5293         NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5294             segflg, path, ctx);
5295
5296 #if NAMEDRSRCFORK
5297         int is_namedstream = 0;
5298         /* stat calls are allowed for resource forks. */
5299         nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5300 #endif
5301         error = nameiat(&nd, fd);
5302         if (error)
5303                 return (error);
5304         fsec = KAUTH_FILESEC_NONE;
5305
5306         statptr = (void *)&source;
5307
5308 #if NAMEDRSRCFORK
5309         /* Grab reference on the shadow stream file vnode to
5310          * force an inactive on release which will mark it
5311          * for recycle.
5312          */
5313         if (vnode_isnamedstream(nd.ni_vp) &&
5314             (nd.ni_vp->v_parent != NULLVP) &&
5315             vnode_isshadow(nd.ni_vp)) {
5316                 is_namedstream = 1;
5317                 vnode_ref(nd.ni_vp);
5318         }
5319 #endif
5320
5321         error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
5322
5323 #if NAMEDRSRCFORK
5324         if (is_namedstream) {
5325                 vnode_rele(nd.ni_vp);
5326         }
5327 #endif
5328         vnode_put(nd.ni_vp);
5329         nameidone(&nd);
5330
5331         if (error)
5332                 return (error);
5333         /* Zap spare fields */
5334         if (isstat64 != 0) {
5335                 source.sb64.st_lspare = 0;
5336                 source.sb64.st_qspare[0] = 0LL;
5337                 source.sb64.st_qspare[1] = 0LL;
5338                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5339                         munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5340                         my_size = sizeof(dest.user64_sb64);
5341                         sbp = (caddr_t)&dest.user64_sb64;
5342                 } else {
5343                         munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5344                         my_size = sizeof(dest.user32_sb64);
5345                         sbp = (caddr_t)&dest.user32_sb64;
5346                 }
5347                 /*
5348                  * Check if we raced (post lookup) against the last unlink of a file.
5349                  */
5350                 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5351                         source.sb64.st_nlink = 1;
5352                 }
5353         } else {
5354                 source.sb.st_lspare = 0;
5355                 source.sb.st_qspare[0] = 0LL;
5356                 source.sb.st_qspare[1] = 0LL;
5357                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5358                         munge_user64_stat(&source.sb, &dest.user64_sb);
5359                         my_size = sizeof(dest.user64_sb);
5360                         sbp = (caddr_t)&dest.user64_sb;
5361                 } else {
5362                         munge_user32_stat(&source.sb, &dest.user32_sb);
5363                         my_size = sizeof(dest.user32_sb);
5364                         sbp = (caddr_t)&dest.user32_sb;
5365                 }
5366
5367                 /*
5368                  * Check if we raced (post lookup) against the last unlink of a file.
5369                  */
5370                 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5371                         source.sb.st_nlink = 1;
5372                 }
5373         }
5374         if ((error = copyout(sbp, ub, my_size)) != 0)
5375                 goto out;
5376
5377         /* caller wants extended security information? */
5378         if (xsecurity != USER_ADDR_NULL) {
5379
5380                 /* did we get any? */
5381                 if (fsec == KAUTH_FILESEC_NONE) {
5382                         if (susize(xsecurity_size, 0) != 0) {
5383                                 error = EFAULT;
5384                                 goto out;
5385                         }
5386                 } else {
5387                         /* find the user buffer size */
5388                         xsecurity_bufsize = fusize(xsecurity_size);
5389
5390                         /* copy out the actual data size */
5391                         if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5392                                 error = EFAULT;
5393                                 goto out;
5394                         }
5395
5396                         /* if the caller supplied enough room, copy out to it */
5397                         if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
5398                                 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5399                 }
5400         }
5401 out:
5402         if (fsec != KAUTH_FILESEC_NONE)
5403                 kauth_filesec_free(fsec);
5404         return (error);
5405 }
5406
5407 /*
5408  * stat_extended: Get file status; with extended security (ACL).
5409  *
5410  * Parameters:    p                       (ignored)
5411  *                uap                     User argument descriptor (see below)
5412  *                retval                  (ignored)
5413  *
5414  * Indirect:      uap->path               Path of file to get status from
5415  *                uap->ub                 User buffer (holds file status info)
5416  *                uap->xsecurity          ACL to get (extended security)
5417  *                uap->xsecurity_size     Size of ACL
5418  *
5419  * Returns:        0                      Success
5420  *                !0                      errno value
5421  *
5422  */
5423 int
5424 stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5425     __unused int32_t *retval)
5426 {
5427         return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5428             uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5429             0));
5430 }
5431
5432 /*
5433  * Returns:     0                       Success
5434  *      fstatat_internal:???            [see fstatat_internal() in this file]
5435  */
5436 int
5437 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
5438 {
5439         return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5440             0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
5441 }
5442
5443 int
5444 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
5445 {
5446         return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5447             0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
5448 }
5449
5450 /*
5451  * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5452  *
5453  * Parameters:    p                       (ignored)
5454  *                uap                     User argument descriptor (see below)
5455  *                retval                  (ignored)
5456  *
5457  * Indirect:      uap->path               Path of file to get status from
5458  *                uap->ub                 User buffer (holds file status info)
5459  *                uap->xsecurity          ACL to get (extended security)
5460  *                uap->xsecurity_size     Size of ACL
5461  *
5462  * Returns:        0                      Success
5463  *                !0                      errno value
5464  *
5465  */
5466 int
5467 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
5468 {
5469         return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5470             uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5471             0));
5472 }
5473
5474 /*
5475  * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5476  *
5477  * Parameters:    p                       (ignored)
5478  *                uap                     User argument descriptor (see below)
5479  *                retval                  (ignored)
5480  *
5481  * Indirect:      uap->path               Path of file to get status from
5482  *                uap->ub                 User buffer (holds file status info)
5483  *                uap->xsecurity          ACL to get (extended security)
5484  *                uap->xsecurity_size     Size of ACL
5485  *
5486  * Returns:        0                      Success
5487  *                !0                      errno value
5488  *
5489  */
5490 int
5491 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
5492 {
5493         return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5494             uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5495             AT_SYMLINK_NOFOLLOW));
5496 }
5497
5498 /*
5499  * Get file status; this version does not follow links.
5500  */
5501 int
5502 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
5503 {
5504         return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5505             0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5506 }
5507
5508 int
5509 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
5510 {
5511         return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5512             0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5513 }
5514
5515 /*
5516  * lstat64_extended: Get file status; can handle large inode numbers; does not
5517  * follow links; with extended security (ACL).
5518  *
5519  * Parameters:    p                       (ignored)
5520  *                uap                     User argument descriptor (see below)
5521  *                retval                  (ignored)
5522  *
5523  * Indirect:      uap->path               Path of file to get status from
5524  *                uap->ub                 User buffer (holds file status info)
5525  *                uap->xsecurity          ACL to get (extended security)
5526  *                uap->xsecurity_size     Size of ACL
5527  *
5528  * Returns:        0                      Success
5529  *                !0                      errno value
5530  *
5531  */
5532 int
5533 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
5534 {
5535         return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5536             uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5537             AT_SYMLINK_NOFOLLOW));
5538 }
5539
5540 int
5541 fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
5542 {
5543         if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5544                 return (EINVAL);
5545
5546         return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5547             0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
5548 }
5549
5550 int
5551 fstatat64(__unused proc_t p, struct fstatat64_args *uap,
5552     __unused int32_t *retval)
5553 {
5554         if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5555                 return (EINVAL);
5556
5557         return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5558             0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
5559 }
5560
5561 /*
5562  * Get configurable pathname variables.
5563  *
5564  * Returns:     0                       Success
5565  *      namei:???
5566  *      vn_pathconf:???
5567  *
5568  * Notes:       Global implementation  constants are intended to be
5569  *              implemented in this function directly; all other constants
5570  *              are per-FS implementation, and therefore must be handled in
5571  *              each respective FS, instead.
5572  *
5573  * XXX We implement some things globally right now that should actually be
5574  * XXX per-FS; we will need to deal with this at some point.
5575  */
5576 /* ARGSUSED */
5577 int
5578 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
5579 {
5580         int error;
5581         struct nameidata nd;
5582         vfs_context_t ctx = vfs_context_current();
5583
5584         NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
5585                 UIO_USERSPACE, uap->path, ctx);
5586         error = namei(&nd);
5587         if (error)
5588                 return (error);
5589
5590         error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
5591
5592         vnode_put(nd.ni_vp);
5593         nameidone(&nd);
5594         return (error);
5595 }
5596
5597 /*
5598  * Return target name of a symbolic link.
5599  */
5600 /* ARGSUSED */
5601 static int
5602 readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
5603     enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
5604     int *retval)
5605 {
5606         vnode_t vp;
5607         uio_t auio;
5608         int error;
5609         struct nameidata nd;
5610         char uio_buf[ UIO_SIZEOF(1) ];
5611
5612         NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5613             seg, path, ctx);
5614
5615         error = nameiat(&nd, fd);
5616         if (error)
5617                 return (error);
5618         vp = nd.ni_vp;
5619
5620         nameidone(&nd);
5621
5622         auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
5623                                     &uio_buf[0], sizeof(uio_buf));
5624         uio_addiov(auio, buf, bufsize);
5625         if (vp->v_type != VLNK) {
5626                 error = EINVAL;
5627         } else {
5628 #if CONFIG_MACF
5629                 error = mac_vnode_check_readlink(ctx, vp);
5630 #endif
5631                 if (error == 0)
5632                         error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
5633                                                 ctx);
5634                 if (error == 0)
5635                         error = VNOP_READLINK(vp, auio, ctx);
5636         }
5637         vnode_put(vp);
5638
5639         *retval = bufsize - (int)uio_resid(auio);
5640         return (error);
5641 }
5642
5643 int
5644 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5645 {
5646         enum uio_seg procseg;
5647
5648         procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5649         return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
5650             CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
5651             uap->count, procseg, retval));
5652 }
5653
5654 int
5655 readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
5656 {
5657         enum uio_seg procseg;
5658
5659         procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5660         return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
5661             procseg, uap->buf, uap->bufsize, procseg, retval));
5662 }
5663
5664 /*
5665  * Change file flags.
5666  */
5667 static int
5668 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5669 {
5670         struct vnode_attr va;
5671         kauth_action_t action;
5672         int error;
5673
5674         VATTR_INIT(&va);
5675         VATTR_SET(&va, va_flags, flags);
5676
5677 #if CONFIG_MACF
5678         error = mac_vnode_check_setflags(ctx, vp, flags);
5679         if (error)
5680                 goto out;
5681 #endif
5682
5683         /* request authorisation, disregard immutability */
5684         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5685                 goto out;
5686         /*
5687          * Request that the auth layer disregard those file flags it's allowed to when
5688          * authorizing this operation; we need to do this in order to be able to
5689          * clear immutable flags.
5690          */
5691         if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5692                 goto out;
5693         error = vnode_setattr(vp, &va, ctx);
5694
5695         if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5696                 error = ENOTSUP;
5697         }
5698 out:
5699         vnode_put(vp);
5700         return(error);
5701 }
5702
5703 /*
5704  * Change flags of a file given a path name.
5705  */
5706 /* ARGSUSED */
5707 int
5708 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
5709 {
5710         vnode_t vp;
5711         vfs_context_t ctx = vfs_context_current();
5712         int error;
5713         struct nameidata nd;
5714
5715         AUDIT_ARG(fflags, uap->flags);
5716         NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5717                 UIO_USERSPACE, uap->path, ctx);
5718         error = namei(&nd);
5719         if (error)
5720                 return (error);
5721         vp = nd.ni_vp;
5722         nameidone(&nd);
5723
5724         error = chflags1(vp, uap->flags, ctx);
5725
5726         return(error);
5727 }
5728
5729 /*
5730  * Change flags of a file given a file descriptor.
5731  */
5732 /* ARGSUSED */
5733 int
5734 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
5735 {
5736         vnode_t vp;
5737         int error;
5738
5739         AUDIT_ARG(fd, uap->fd);
5740         AUDIT_ARG(fflags, uap->flags);
5741         if ( (error = file_vnode(uap->fd, &vp)) )
5742                 return (error);
5743
5744         if ((error = vnode_getwithref(vp))) {
5745                 file_drop(uap->fd);
5746                 return(error);
5747         }
5748
5749         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5750
5751         error = chflags1(vp, uap->flags, vfs_context_current());
5752
5753         file_drop(uap->fd);
5754         return (error);
5755 }
5756
5757 /*
5758  * Change security information on a filesystem object.
5759  *
5760  * Returns:     0                       Success
5761  *              EPERM                   Operation not permitted
5762  *              vnode_authattr:???      [anything vnode_authattr can return]
5763  *              vnode_authorize:???     [anything vnode_authorize can return]
5764  *              vnode_setattr:???       [anything vnode_setattr can return]
5765  *
5766  * Notes:       If vnode_authattr or vnode_authorize return EACCES, it will be
5767  *              translated to EPERM before being returned.
5768  */
5769 static int
5770 chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
5771 {
5772         kauth_action_t action;
5773         int error;
5774
5775         AUDIT_ARG(mode, vap->va_mode);
5776         /* XXX audit new args */
5777
5778 #if NAMEDSTREAMS
5779         /* chmod calls are not allowed for resource forks. */
5780         if (vp->v_flag & VISNAMEDSTREAM) {
5781                 return (EPERM);
5782         }
5783 #endif
5784
5785 #if CONFIG_MACF
5786         if (VATTR_IS_ACTIVE(vap, va_mode) &&
5787             (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
5788                 return (error);
5789 #endif
5790
5791         /* make sure that the caller is allowed to set this security information */
5792         if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
5793             ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5794                 if (error == EACCES)
5795                         error = EPERM;
5796                 return(error);
5797         }
5798
5799         error = vnode_setattr(vp, vap, ctx);
5800
5801         return (error);
5802 }
5803
5804
5805 /*
5806  * Change mode of a file given a path name.
5807  *
5808  * Returns:     0                       Success
5809  *              namei:???               [anything namei can return]
5810  *              chmod_vnode:???         [anything chmod_vnode can return]
5811  */
5812 static int
5813 chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
5814     int fd, int flag, enum uio_seg segflg)
5815 {
5816         struct nameidata nd;
5817         int follow, error;
5818
5819         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5820         NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
5821             segflg, path, ctx);
5822         if ((error = nameiat(&nd, fd)))
5823                 return (error);
5824         error = chmod_vnode(ctx, nd.ni_vp, vap);
5825         vnode_put(nd.ni_vp);
5826         nameidone(&nd);
5827         return(error);
5828 }
5829
5830 /*
5831  * chmod_extended: Change the mode of a file given a path name; with extended
5832  * argument list (including extended security (ACL)).
5833  *
5834  * Parameters:  p                       Process requesting the open
5835  *              uap                     User argument descriptor (see below)
5836  *              retval                  (ignored)
5837  *
5838  * Indirect:    uap->path               Path to object (same as 'chmod')
5839  *              uap->uid                UID to set
5840  *              uap->gid                GID to set
5841  *              uap->mode               File mode to set (same as 'chmod')
5842  *              uap->xsecurity          ACL to set (or delete)
5843  *
5844  * Returns:     0                       Success
5845  *              !0                      errno value
5846  *
5847  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
5848  *
5849  * XXX:         We should enummerate the possible errno values here, and where
5850  *              in the code they originated.
5851  */
5852 int
5853 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
5854 {
5855         int error;
5856         struct vnode_attr va;
5857         kauth_filesec_t xsecdst;
5858
5859         AUDIT_ARG(owner, uap->uid, uap->gid);
5860
5861         VATTR_INIT(&va);
5862         if (uap->mode != -1)
5863                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5864         if (uap->uid != KAUTH_UID_NONE)
5865                 VATTR_SET(&va, va_uid, uap->uid);
5866         if (uap->gid != KAUTH_GID_NONE)
5867                 VATTR_SET(&va, va_gid, uap->gid);
5868
5869         xsecdst = NULL;
5870         switch(uap->xsecurity) {
5871                 /* explicit remove request */
5872         case CAST_USER_ADDR_T((void *)1):       /* _FILESEC_REMOVE_ACL */
5873                 VATTR_SET(&va, va_acl, NULL);
5874                 break;
5875                 /* not being set */
5876         case USER_ADDR_NULL:
5877                 break;
5878         default:
5879                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5880                         return(error);
5881                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5882                 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
5883         }
5884
5885         error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
5886             UIO_USERSPACE);
5887
5888         if (xsecdst != NULL)
5889                 kauth_filesec_free(xsecdst);
5890         return(error);
5891 }
5892
5893 /*
5894  * Returns:     0                       Success
5895  *              chmodat:???             [anything chmodat can return]
5896  */
5897 static int
5898 fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
5899     int flag, enum uio_seg segflg)
5900 {
5901         struct vnode_attr va;
5902
5903         VATTR_INIT(&va);
5904         VATTR_SET(&va, va_mode, mode & ALLPERMS);
5905
5906         return (chmodat(ctx, path, &va, fd, flag, segflg));
5907 }
5908
5909 int
5910 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
5911 {
5912         return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
5913             AT_FDCWD, 0, UIO_USERSPACE));
5914 }
5915
5916 int
5917 fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
5918 {
5919         if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5920                 return (EINVAL);
5921
5922         return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
5923             uap->fd, uap->flag, UIO_USERSPACE));
5924 }
5925
5926 /*
5927  * Change mode of a file given a file descriptor.
5928  */
5929 static int
5930 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
5931 {
5932         vnode_t vp;
5933         int error;
5934
5935         AUDIT_ARG(fd, fd);
5936
5937         if ((error = file_vnode(fd, &vp)) != 0)
5938                 return (error);
5939         if ((error = vnode_getwithref(vp)) != 0) {
5940                 file_drop(fd);
5941                 return(error);
5942         }
5943         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5944
5945         error = chmod_vnode(vfs_context_current(), vp, vap);
5946         (void)vnode_put(vp);
5947         file_drop(fd);
5948
5949         return (error);
5950 }
5951
5952 /*
5953  * fchmod_extended: Change mode of a file given a file descriptor; with
5954  * extended argument list (including extended security (ACL)).
5955  *
5956  * Parameters:    p                       Process requesting to change file mode
5957  *                uap                     User argument descriptor (see below)
5958  *                retval                  (ignored)
5959  *
5960  * Indirect:      uap->mode               File mode to set (same as 'chmod')
5961  *                uap->uid                UID to set
5962  *                uap->gid                GID to set
5963  *                uap->xsecurity          ACL to set (or delete)
5964  *                uap->fd                 File descriptor of file to change mode
5965  *
5966  * Returns:        0                      Success
5967  *                !0                      errno value
5968  *
5969  */
5970 int
5971 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
5972 {
5973         int error;
5974         struct vnode_attr va;
5975         kauth_filesec_t xsecdst;
5976
5977         AUDIT_ARG(owner, uap->uid, uap->gid);
5978
5979         VATTR_INIT(&va);
5980         if (uap->mode != -1)
5981                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5982         if (uap->uid != KAUTH_UID_NONE)
5983                 VATTR_SET(&va, va_uid, uap->uid);
5984         if (uap->gid != KAUTH_GID_NONE)
5985                 VATTR_SET(&va, va_gid, uap->gid);
5986
5987         xsecdst = NULL;
5988         switch(uap->xsecurity) {
5989         case USER_ADDR_NULL:
5990                 VATTR_SET(&va, va_acl, NULL);
5991                 break;
5992         case CAST_USER_ADDR_T((void *)1):       /* _FILESEC_REMOVE_ACL */
5993                 VATTR_SET(&va, va_acl, NULL);
5994                 break;
5995                 /* not being set */
5996         case CAST_USER_ADDR_T(-1):
5997                 break;
5998         default:
5999                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6000                         return(error);
6001                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6002         }
6003
6004         error = fchmod1(p, uap->fd, &va);
6005
6006
6007         switch(uap->xsecurity) {
6008         case USER_ADDR_NULL:
6009         case CAST_USER_ADDR_T(-1):
6010                 break;
6011         default:
6012                 if (xsecdst != NULL)
6013                         kauth_filesec_free(xsecdst);
6014         }
6015         return(error);
6016 }
6017
6018 int
6019 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
6020 {
6021         struct vnode_attr va;
6022
6023         VATTR_INIT(&va);
6024         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6025
6026         return(fchmod1(p, uap->fd, &va));
6027 }
6028
6029
6030 /*
6031  * Set ownership given a path name.
6032  */
6033 /* ARGSUSED */
6034 static int
6035 fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
6036    gid_t gid, int flag, enum uio_seg segflg)
6037 {
6038         vnode_t vp;
6039         struct vnode_attr va;
6040         int error;
6041         struct nameidata nd;
6042         int follow;
6043         kauth_action_t action;
6044
6045         AUDIT_ARG(owner, uid, gid);
6046
6047         follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6048         NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6049             path, ctx);
6050         error = nameiat(&nd, fd);
6051         if (error)
6052                 return (error);
6053         vp = nd.ni_vp;
6054
6055         nameidone(&nd);
6056
6057         VATTR_INIT(&va);
6058         if (uid != (uid_t)VNOVAL)
6059                 VATTR_SET(&va, va_uid, uid);
6060         if (gid != (gid_t)VNOVAL)
6061                 VATTR_SET(&va, va_gid, gid);
6062
6063 #if CONFIG_MACF
6064         error = mac_vnode_check_setowner(ctx, vp, uid, gid);
6065         if (error)
6066                 goto out;
6067 #endif
6068
6069         /* preflight and authorize attribute changes */
6070         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6071                 goto out;
6072         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6073                 goto out;
6074         error = vnode_setattr(vp, &va, ctx);
6075
6076 out:
6077         /*
6078          * EACCES is only allowed from namei(); permissions failure should
6079          * return EPERM, so we need to translate the error code.
6080          */
6081         if (error == EACCES)
6082                 error = EPERM;
6083
6084         vnode_put(vp);
6085         return (error);
6086 }
6087
6088 int
6089 chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
6090 {
6091         return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6092             uap->uid, uap->gid, 0, UIO_USERSPACE));
6093 }
6094
6095 int
6096 lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
6097 {
6098         return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6099             uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
6100 }
6101
6102 int
6103 fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6104 {
6105         if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6106                 return (EINVAL);
6107
6108         return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6109             uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
6110 }
6111
6112 /*
6113  * Set ownership given a file descriptor.
6114  */
6115 /* ARGSUSED */
6116 int
6117 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
6118 {
6119         struct vnode_attr va;
6120         vfs_context_t ctx = vfs_context_current();
6121         vnode_t vp;
6122         int error;
6123         kauth_action_t action;
6124
6125         AUDIT_ARG(owner, uap->uid, uap->gid);
6126         AUDIT_ARG(fd, uap->fd);
6127
6128         if ( (error = file_vnode(uap->fd, &vp)) )
6129                 return (error);
6130
6131         if ( (error = vnode_getwithref(vp)) ) {
6132                 file_drop(uap->fd);
6133                 return(error);
6134         }
6135         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6136
6137         VATTR_INIT(&va);
6138         if (uap->uid != VNOVAL)
6139                 VATTR_SET(&va, va_uid, uap->uid);
6140         if (uap->gid != VNOVAL)
6141                 VATTR_SET(&va, va_gid, uap->gid);
6142
6143 #if NAMEDSTREAMS
6144         /* chown calls are not allowed for resource forks. */
6145         if (vp->v_flag & VISNAMEDSTREAM) {
6146                 error = EPERM;
6147                 goto out;
6148         }
6149 #endif
6150
6151 #if CONFIG_MACF
6152         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
6153         if (error)
6154                 goto out;
6155 #endif
6156
6157         /* preflight and authorize attribute changes */
6158         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6159                 goto out;
6160         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6161                 if (error == EACCES)
6162                         error = EPERM;
6163                 goto out;
6164         }
6165         error = vnode_setattr(vp, &va, ctx);
6166
6167 out:
6168         (void)vnode_put(vp);
6169         file_drop(uap->fd);
6170         return (error);
6171 }
6172
6173 static int
6174 getutimes(user_addr_t usrtvp, struct timespec *tsp)
6175 {
6176         int error;
6177
6178         if (usrtvp == USER_ADDR_NULL) {
6179                 struct timeval old_tv;
6180                 /* XXX Y2038 bug because of microtime argument */
6181                 microtime(&old_tv);
6182                 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
6183                 tsp[1] = tsp[0];
6184         } else {
6185                 if (IS_64BIT_PROCESS(current_proc())) {
6186                         struct user64_timeval tv[2];
6187                         error = copyin(usrtvp, (void *)tv, sizeof(tv));
6188                         if (error)
6189                                 return (error);
6190                         TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6191                         TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6192                 } else {
6193                         struct user32_timeval tv[2];
6194                         error = copyin(usrtvp, (void *)tv, sizeof(tv));
6195                         if (error)
6196                                 return (error);
6197                         TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6198                         TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6199                 }
6200         }
6201         return 0;
6202 }
6203
6204 static int
6205 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
6206         int nullflag)
6207 {
6208         int error;
6209         struct vnode_attr va;
6210         kauth_action_t action;
6211
6212         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6213
6214         VATTR_INIT(&va);
6215         VATTR_SET(&va, va_access_time, ts[0]);
6216         VATTR_SET(&va, va_modify_time, ts[1]);
6217         if (nullflag)
6218                 va.va_vaflags |= VA_UTIMES_NULL;
6219
6220 #if NAMEDSTREAMS
6221         /* utimes calls are not allowed for resource forks. */
6222         if (vp->v_flag & VISNAMEDSTREAM) {
6223                 error = EPERM;
6224                 goto out;
6225         }
6226 #endif
6227
6228 #if CONFIG_MACF
6229         error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
6230         if (error)
6231                 goto out;
6232 #endif
6233         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6234                 if (!nullflag && error == EACCES)
6235                         error = EPERM;
6236                 goto out;
6237         }
6238
6239         /* since we may not need to auth anything, check here */
6240         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6241                 if (!nullflag && error == EACCES)
6242                         error = EPERM;
6243                 goto out;
6244         }
6245         error = vnode_setattr(vp, &va, ctx);
6246
6247 out:
6248         return error;
6249 }
6250
6251 /*
6252  * Set the access and modification times of a file.
6253  */
6254 /* ARGSUSED */
6255 int
6256 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
6257 {
6258         struct timespec ts[2];
6259         user_addr_t usrtvp;
6260         int error;
6261         struct nameidata nd;
6262         vfs_context_t ctx = vfs_context_current();
6263
6264         /*
6265          * AUDIT: Needed to change the order of operations to do the
6266          * name lookup first because auditing wants the path.
6267          */
6268         NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
6269                 UIO_USERSPACE, uap->path, ctx);
6270         error = namei(&nd);
6271         if (error)
6272                 return (error);
6273         nameidone(&nd);
6274
6275         /*
6276          * Fetch the user-supplied time.  If usrtvp is USER_ADDR_NULL, we fetch
6277          * the current time instead.
6278          */
6279         usrtvp = uap->tptr;
6280         if ((error = getutimes(usrtvp, ts)) != 0)
6281                 goto out;
6282
6283         error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
6284
6285 out:
6286         vnode_put(nd.ni_vp);
6287         return (error);
6288 }
6289
6290 /*
6291  * Set the access and modification times of a file.
6292  */
6293 /* ARGSUSED */
6294 int
6295 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
6296 {
6297         struct timespec ts[2];
6298         vnode_t vp;
6299         user_addr_t usrtvp;
6300         int error;
6301
6302         AUDIT_ARG(fd, uap->fd);
6303         usrtvp = uap->tptr;
6304         if ((error = getutimes(usrtvp, ts)) != 0)
6305                 return (error);
6306         if ((error = file_vnode(uap->fd, &vp)) != 0)
6307                 return (error);
6308         if((error = vnode_getwithref(vp))) {
6309                 file_drop(uap->fd);
6310                 return(error);
6311         }
6312
6313         error =  setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
6314         vnode_put(vp);
6315         file_drop(uap->fd);
6316         return(error);
6317 }
6318
6319 /*
6320  * Truncate a file given its path name.
6321  */
6322 /* ARGSUSED */
6323 int
6324 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
6325 {
6326         vnode_t vp;
6327         struct vnode_attr va;
6328         vfs_context_t ctx = vfs_context_current();
6329         int error;
6330         struct nameidata nd;
6331         kauth_action_t action;
6332
6333         if (uap->length < 0)
6334                 return(EINVAL);
6335         NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
6336                 UIO_USERSPACE, uap->path, ctx);
6337         if ((error = namei(&nd)))
6338                 return (error);
6339         vp = nd.ni_vp;
6340
6341         nameidone(&nd);
6342
6343         VATTR_INIT(&va);
6344         VATTR_SET(&va, va_data_size, uap->length);
6345
6346 #if CONFIG_MACF
6347         error = mac_vnode_check_truncate(ctx, NOCRED, vp);
6348         if (error)
6349                 goto out;
6350 #endif
6351
6352         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6353                 goto out;
6354         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6355                 goto out;
6356         error = vnode_setattr(vp, &va, ctx);
6357 out:
6358         vnode_put(vp);
6359         return (error);
6360 }
6361
6362 /*
6363  * Truncate a file given a file descriptor.
6364  */
6365 /* ARGSUSED */
6366 int
6367 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
6368 {
6369         vfs_context_t ctx = vfs_context_current();
6370         struct vnode_attr va;
6371         vnode_t vp;
6372         struct fileproc *fp;
6373         int error ;
6374         int fd = uap->fd;
6375
6376         AUDIT_ARG(fd, uap->fd);
6377         if (uap->length < 0)
6378                 return(EINVAL);
6379
6380         if ( (error = fp_lookup(p,fd,&fp,0)) ) {
6381                 return(error);
6382         }
6383
6384         switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6385         case DTYPE_PSXSHM:
6386                 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
6387                 goto out;
6388         case DTYPE_VNODE:
6389                 break;
6390         default:
6391                 error = EINVAL;
6392                 goto out;
6393         }
6394
6395         vp = (vnode_t)fp->f_fglob->fg_data;
6396
6397         if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
6398                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6399                 error = EINVAL;
6400                 goto out;
6401         }
6402
6403         if ((error = vnode_getwithref(vp)) != 0) {
6404                 goto out;
6405         }
6406
6407         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6408
6409 #if CONFIG_MACF
6410         error = mac_vnode_check_truncate(ctx,
6411             fp->f_fglob->fg_cred, vp);
6412         if (error) {
6413                 (void)vnode_put(vp);
6414                 goto out;
6415         }
6416 #endif
6417         VATTR_INIT(&va);
6418         VATTR_SET(&va, va_data_size, uap->length);
6419         error = vnode_setattr(vp, &va, ctx);
6420         (void)vnode_put(vp);
6421 out:
6422         file_drop(fd);
6423         return (error);
6424 }
6425
6426
6427 /*
6428  * Sync an open file with synchronized I/O _file_ integrity completion
6429  */
6430 /* ARGSUSED */
6431 int
6432 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
6433 {
6434         __pthread_testcancel(1);
6435         return(fsync_common(p, uap, MNT_WAIT));
6436 }
6437
6438
6439 /*
6440  * Sync an open file with synchronized I/O _file_ integrity completion
6441  *
6442  * Notes:       This is a legacy support function that does not test for
6443  *              thread cancellation points.
6444  */
6445 /* ARGSUSED */
6446 int
6447 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
6448 {
6449         return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
6450 }
6451
6452
6453 /*
6454  * Sync an open file with synchronized I/O _data_ integrity completion
6455  */
6456 /* ARGSUSED */
6457 int
6458 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
6459 {
6460         __pthread_testcancel(1);
6461         return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
6462 }
6463
6464
6465 /*
6466  * fsync_common
6467  *
6468  * Common fsync code to support both synchronized I/O file integrity completion
6469  * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6470  *
6471  * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6472  * will only guarantee that the file data contents are retrievable.  If
6473  * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6474  * includes additional metadata unnecessary for retrieving the file data
6475  * contents, such as atime, mtime, ctime, etc., also be committed to stable
6476  * storage.
6477  *
6478  * Parameters:  p                               The process
6479  *              uap->fd                         The descriptor to synchronize
6480  *              flags                           The data integrity flags
6481  *
6482  * Returns:     int                             Success
6483  *      fp_getfvp:EBADF                         Bad file descriptor
6484  *      fp_getfvp:ENOTSUP                       fd does not refer to a vnode
6485  *      VNOP_FSYNC:???                          unspecified
6486  *
6487  * Notes:       We use struct fsync_args because it is a short name, and all
6488  *              caller argument structures are otherwise identical.
6489  */
6490 static int
6491 fsync_common(proc_t p, struct fsync_args *uap, int flags)
6492 {
6493         vnode_t vp;
6494         struct fileproc *fp;
6495         vfs_context_t ctx = vfs_context_current();
6496         int error;
6497
6498         AUDIT_ARG(fd, uap->fd);
6499
6500         if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
6501                 return (error);
6502         if ( (error = vnode_getwithref(vp)) ) {
6503                 file_drop(uap->fd);
6504                 return(error);
6505         }
6506
6507         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6508
6509         error = VNOP_FSYNC(vp, flags, ctx);
6510
6511 #if NAMEDRSRCFORK
6512         /* Sync resource fork shadow file if necessary. */
6513         if ((error == 0) &&
6514             (vp->v_flag & VISNAMEDSTREAM) &&
6515             (vp->v_parent != NULLVP) &&
6516             vnode_isshadow(vp) &&
6517             (fp->f_flags & FP_WRITTEN)) {
6518                 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
6519         }
6520 #endif
6521
6522         (void)vnode_put(vp);
6523         file_drop(uap->fd);
6524         return (error);
6525 }
6526
6527 /*
6528  * Duplicate files.  Source must be a file, target must be a file or
6529  * must not exist.
6530  *
6531  * XXX Copyfile authorisation checking is woefully inadequate, and will not
6532  *     perform inheritance correctly.
6533  */
6534 /* ARGSUSED */
6535 int
6536 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
6537 {
6538         vnode_t tvp, fvp, tdvp, sdvp;
6539         struct nameidata fromnd, tond;
6540         int error;
6541         vfs_context_t ctx = vfs_context_current();
6542
6543         /* Check that the flags are valid. */
6544
6545         if (uap->flags & ~CPF_MASK) {
6546                 return(EINVAL);
6547         }
6548
6549         NDINIT(&fromnd, LOOKUP, OP_COPYFILE, SAVESTART | AUDITVNPATH1,
6550                 UIO_USERSPACE, uap->from, ctx);
6551         if ((error = namei(&fromnd)))
6552                 return (error);
6553         fvp = fromnd.ni_vp;
6554
6555         NDINIT(&tond, CREATE, OP_LINK,
6556                LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6557                UIO_USERSPACE, uap->to, ctx);
6558         if ((error = namei(&tond))) {
6559                 goto out1;
6560         }
6561         tdvp = tond.ni_dvp;
6562         tvp = tond.ni_vp;
6563
6564         if (tvp != NULL) {
6565                 if (!(uap->flags & CPF_OVERWRITE)) {
6566                         error = EEXIST;
6567                         goto out;
6568                 }
6569         }
6570         if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
6571                 error = EISDIR;
6572                 goto out;
6573         }
6574
6575         if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
6576                 goto out;
6577
6578         if (fvp == tdvp)
6579                 error = EINVAL;
6580         /*
6581          * If source is the same as the destination (that is the
6582          * same inode number) then there is nothing to do.
6583          * (fixed to have POSIX semantics - CSM 3/2/98)
6584          */
6585         if (fvp == tvp)
6586                 error = -1;
6587         if (!error)
6588                 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
6589 out:
6590         sdvp = tond.ni_startdir;
6591         /*
6592          * nameidone has to happen before we vnode_put(tdvp)
6593          * since it may need to release the fs_nodelock on the tdvp
6594          */
6595         nameidone(&tond);
6596
6597         if (tvp)
6598                 vnode_put(tvp);
6599         vnode_put(tdvp);
6600         vnode_put(sdvp);
6601 out1:
6602         vnode_put(fvp);
6603
6604         if (fromnd.ni_startdir)
6605                 vnode_put(fromnd.ni_startdir);
6606         nameidone(&fromnd);
6607
6608         if (error == -1)
6609                 return (0);
6610         return (error);
6611 }
6612
6613
6614 /*
6615  * Rename files.  Source and destination must either both be directories,
6616  * or both not be directories.  If target is a directory, it must be empty.
6617  */
6618 /* ARGSUSED */
6619 static int
6620 renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
6621     int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
6622 {
6623         vnode_t tvp, tdvp;
6624         vnode_t fvp, fdvp;
6625         struct nameidata *fromnd, *tond;
6626         int error;
6627         int do_retry;
6628         int retry_count;
6629         int mntrename;
6630         int need_event;
6631         const char *oname = NULL;
6632         char *from_name = NULL, *to_name = NULL;
6633         int from_len=0, to_len=0;
6634         int holding_mntlock;
6635         mount_t locked_mp = NULL;
6636         vnode_t oparent = NULLVP;
6637 #if CONFIG_FSE
6638         fse_info from_finfo, to_finfo;
6639 #endif
6640         int from_truncated=0, to_truncated;
6641         int batched = 0;
6642         struct vnode_attr *fvap, *tvap;
6643         int continuing = 0;
6644         /* carving out a chunk for structs that are too big to be on stack. */
6645         struct {
6646                 struct nameidata from_node, to_node;
6647                 struct vnode_attr fv_attr, tv_attr;
6648         } * __rename_data;
6649         MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
6650         fromnd = &__rename_data->from_node;
6651         tond = &__rename_data->to_node;
6652
6653         holding_mntlock = 0;
6654         do_retry = 0;
6655         retry_count = 0;
6656 retry:
6657         fvp = tvp = NULL;
6658         fdvp = tdvp = NULL;
6659         fvap = tvap = NULL;
6660         mntrename = FALSE;
6661
6662         NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
6663             segflg, from, ctx);
6664         fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
6665
6666         NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6667             segflg, to, ctx);
6668         tond->ni_flag = NAMEI_COMPOUNDRENAME;
6669
6670 continue_lookup:
6671         if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
6672                 if ( (error = nameiat(fromnd, fromfd)) )
6673                         goto out1;
6674                 fdvp = fromnd->ni_dvp;
6675                 fvp  = fromnd->ni_vp;
6676
6677                 if (fvp && fvp->v_type == VDIR)
6678                         tond->ni_cnd.cn_flags |= WILLBEDIR;
6679         }
6680
6681         if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
6682                 if ( (error = nameiat(tond, tofd)) ) {
6683                         /*
6684                          * Translate error code for rename("dir1", "dir2/.").
6685                          */
6686                         if (error == EISDIR && fvp->v_type == VDIR)
6687                                 error = EINVAL;
6688                         goto out1;
6689                 }
6690                 tdvp = tond->ni_dvp;
6691                 tvp  = tond->ni_vp;
6692         }
6693
6694         batched = vnode_compound_rename_available(fdvp);
6695         if (!fvp) {
6696                 /*
6697                  * Claim: this check will never reject a valid rename.
6698                  * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
6699                  * Suppose fdvp and tdvp are not on the same mount.
6700                  * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem.  If fvp is the root,
6701                  *      then you can't move it to within another dir on the same mountpoint.
6702                  * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
6703                  *
6704                  * If this check passes, then we are safe to pass these vnodes to the same FS.
6705                  */
6706                 if (fdvp->v_mount != tdvp->v_mount) {
6707                         error = EXDEV;
6708                         goto out1;
6709                 }
6710                 goto skipped_lookup;
6711         }
6712
6713         if (!batched) {
6714                 error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL);
6715                 if (error) {
6716                         if (error == ENOENT &&
6717                             retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
6718                                 /*
6719                                  * We encountered a race where after doing the namei, tvp stops
6720                                  * being valid. If so, simply re-drive the rename call from the
6721                                  * top.
6722                                  */
6723                                 do_retry = 1;
6724                                 retry_count += 1;
6725                         }
6726                         goto out1;
6727                 }
6728         }
6729
6730         /*
6731          * If the source and destination are the same (i.e. they're
6732          * links to the same vnode) and the target file system is
6733          * case sensitive, then there is nothing to do.
6734          *
6735          * XXX Come back to this.
6736          */
6737         if (fvp == tvp) {
6738                 int pathconf_val;
6739
6740                 /*
6741                  * Note: if _PC_CASE_SENSITIVE selector isn't supported,
6742                  * then assume that this file system is case sensitive.
6743                  */
6744                 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
6745                     pathconf_val != 0) {
6746                         goto out1;
6747                 }
6748         }
6749
6750         /*
6751          * Allow the renaming of mount points.
6752          * - target must not exist
6753          * - target must reside in the same directory as source
6754          * - union mounts cannot be renamed
6755          * - "/" cannot be renamed
6756          *
6757          * XXX Handle this in VFS after a continued lookup (if we missed
6758          * in the cache to start off)
6759          */
6760         if ((fvp->v_flag & VROOT) &&
6761             (fvp->v_type == VDIR) &&
6762             (tvp == NULL)  &&
6763             (fvp->v_mountedhere == NULL)  &&
6764             (fdvp == tdvp)  &&
6765             ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0)  &&
6766             (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
6767                 vnode_t coveredvp;
6768
6769                 /* switch fvp to the covered vnode */
6770                 coveredvp = fvp->v_mount->mnt_vnodecovered;
6771                 if ( (vnode_getwithref(coveredvp)) ) {
6772                         error = ENOENT;
6773                         goto out1;
6774                 }
6775                 vnode_put(fvp);
6776
6777                 fvp = coveredvp;
6778                 mntrename = TRUE;
6779         }
6780         /*
6781          * Check for cross-device rename.
6782          */
6783         if ((fvp->v_mount != tdvp->v_mount) ||
6784             (tvp && (fvp->v_mount != tvp->v_mount))) {
6785                 error = EXDEV;
6786                 goto out1;
6787         }
6788
6789         /*
6790          * If source is the same as the destination (that is the
6791          * same inode number) then there is nothing to do...
6792          * EXCEPT if the underlying file system supports case
6793          * insensitivity and is case preserving.  In this case
6794          * the file system needs to handle the special case of
6795          * getting the same vnode as target (fvp) and source (tvp).
6796          *
6797          * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
6798          * and _PC_CASE_PRESERVING can have this exception, and they need to
6799          * handle the special case of getting the same vnode as target and
6800          * source.  NOTE: Then the target is unlocked going into vnop_rename,
6801          * so not to cause locking problems. There is a single reference on tvp.
6802          *
6803          * NOTE - that fvp == tvp also occurs if they are hard linked and
6804          * that correct behaviour then is just to return success without doing
6805          * anything.
6806          *
6807          * XXX filesystem should take care of this itself, perhaps...
6808          */
6809         if (fvp == tvp && fdvp == tdvp) {
6810                 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
6811                     !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
6812                           fromnd->ni_cnd.cn_namelen)) {
6813                         goto out1;
6814                 }
6815         }
6816
6817         if (holding_mntlock && fvp->v_mount != locked_mp) {
6818                 /*
6819                  * we're holding a reference and lock
6820                  * on locked_mp, but it no longer matches
6821                  * what we want to do... so drop our hold
6822                  */
6823                 mount_unlock_renames(locked_mp);
6824                 mount_drop(locked_mp, 0);
6825                 holding_mntlock = 0;
6826         }
6827         if (tdvp != fdvp && fvp->v_type == VDIR) {
6828                 /*
6829                  * serialize renames that re-shape
6830                  * the tree... if holding_mntlock is
6831                  * set, then we're ready to go...
6832                  * otherwise we
6833                  * first need to drop the iocounts
6834                  * we picked up, second take the
6835                  * lock to serialize the access,
6836                  * then finally start the lookup
6837                  * process over with the lock held
6838                  */
6839                 if (!holding_mntlock) {
6840                         /*
6841                          * need to grab a reference on
6842                          * the mount point before we
6843                          * drop all the iocounts... once
6844                          * the iocounts are gone, the mount
6845                          * could follow
6846                          */
6847                         locked_mp = fvp->v_mount;
6848                         mount_ref(locked_mp, 0);
6849
6850                         /*
6851                          * nameidone has to happen before we vnode_put(tvp)
6852                          * since it may need to release the fs_nodelock on the tvp
6853                          */
6854                         nameidone(tond);
6855
6856                         if (tvp)
6857                                 vnode_put(tvp);
6858                         vnode_put(tdvp);
6859
6860                         /*
6861                          * nameidone has to happen before we vnode_put(fdvp)
6862                          * since it may need to release the fs_nodelock on the fvp
6863                          */
6864                         nameidone(fromnd);
6865
6866                         vnode_put(fvp);
6867                         vnode_put(fdvp);
6868
6869                         mount_lock_renames(locked_mp);
6870                         holding_mntlock = 1;
6871
6872                         goto retry;
6873                 }
6874         } else {
6875                 /*
6876                  * when we dropped the iocounts to take
6877                  * the lock, we allowed the identity of
6878                  * the various vnodes to change... if they did,
6879                  * we may no longer be dealing with a rename
6880                  * that reshapes the tree... once we're holding
6881                  * the iocounts, the vnodes can't change type
6882                  * so we're free to drop the lock at this point
6883                  * and continue on
6884                  */
6885                 if (holding_mntlock) {
6886                         mount_unlock_renames(locked_mp);
6887                         mount_drop(locked_mp, 0);
6888                         holding_mntlock = 0;
6889                 }
6890         }
6891
6892         // save these off so we can later verify that fvp is the same
6893         oname   = fvp->v_name;
6894         oparent = fvp->v_parent;
6895
6896 skipped_lookup:
6897 #if CONFIG_FSE
6898         need_event = need_fsevent(FSE_RENAME, fdvp);
6899         if (need_event) {
6900                 if (fvp) {
6901                         get_fse_info(fvp, &from_finfo, ctx);
6902                 } else {
6903                         error = vfs_get_notify_attributes(&__rename_data->fv_attr);
6904                         if (error) {
6905                                 goto out1;
6906                         }
6907
6908                         fvap = &__rename_data->fv_attr;
6909                 }
6910
6911                 if (tvp) {
6912                         get_fse_info(tvp, &to_finfo, ctx);
6913                 } else if (batched) {
6914                         error = vfs_get_notify_attributes(&__rename_data->tv_attr);
6915                         if (error) {
6916                                 goto out1;
6917                         }
6918
6919                         tvap = &__rename_data->tv_attr;
6920                 }
6921         }
6922 #else
6923         need_event = 0;
6924 #endif /* CONFIG_FSE */
6925
6926         if (need_event || kauth_authorize_fileop_has_listeners()) {
6927                 if (from_name == NULL) {
6928                         GET_PATH(from_name);
6929                         if (from_name == NULL) {
6930                                 error = ENOMEM;
6931                                 goto out1;
6932                         }
6933                 }
6934
6935                 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
6936
6937                 if (to_name == NULL) {
6938                         GET_PATH(to_name);
6939                         if (to_name == NULL) {
6940                                 error = ENOMEM;
6941                                 goto out1;
6942                         }
6943                 }
6944
6945                 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
6946         }
6947 #if CONFIG_SECLUDED_RENAME
6948         if (flags & VFS_SECLUDE_RENAME) {
6949                 fromnd->ni_cnd.cn_flags |=  CN_SECLUDE_RENAME;
6950         }
6951 #else
6952         #pragma unused(flags)
6953 #endif
6954         error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
6955                             tdvp, &tvp, &tond->ni_cnd, tvap,
6956                             0, ctx);
6957
6958         if (holding_mntlock) {
6959                 /*
6960                  * we can drop our serialization
6961                  * lock now
6962                  */
6963                 mount_unlock_renames(locked_mp);
6964                 mount_drop(locked_mp, 0);
6965                 holding_mntlock = 0;
6966         }
6967         if (error) {
6968                 if (error == EKEEPLOOKING) {
6969                         if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6970                                 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6971                                         panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
6972                                 }
6973                         }
6974
6975                         fromnd->ni_vp = fvp;
6976                         tond->ni_vp = tvp;
6977
6978                         goto continue_lookup;
6979                 }
6980
6981                 /*
6982                  * We may encounter a race in the VNOP where the destination didn't
6983                  * exist when we did the namei, but it does by the time we go and
6984                  * try to create the entry. In this case, we should re-drive this rename
6985                  * call from the top again.  Currently, only HFS bubbles out ERECYCLE,
6986                  * but other filesystems susceptible to this race could return it, too.
6987                  */
6988                 if (error == ERECYCLE) {
6989                         do_retry = 1;
6990                 }
6991
6992                 /*
6993                  * For compound VNOPs, the authorization callback may return
6994                  * ENOENT in case of racing hardlink lookups hitting the name
6995                  * cache, redrive the lookup.
6996                  */
6997                 if (batched && error == ENOENT &&
6998                     retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
6999                         do_retry = 1;
7000                         retry_count += 1;
7001                 }
7002
7003                 goto out1;
7004         }
7005
7006         /* call out to allow 3rd party notification of rename.
7007          * Ignore result of kauth_authorize_fileop call.
7008          */
7009         kauth_authorize_fileop(vfs_context_ucred(ctx),
7010                         KAUTH_FILEOP_RENAME,
7011                         (uintptr_t)from_name, (uintptr_t)to_name);
7012
7013 #if CONFIG_FSE
7014         if (from_name != NULL && to_name != NULL) {
7015                 if (from_truncated || to_truncated) {
7016                         // set it here since only the from_finfo gets reported up to user space
7017                         from_finfo.mode |= FSE_TRUNCATED_PATH;
7018                 }
7019
7020                 if (tvap && tvp) {
7021                         vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
7022                 }
7023                 if (fvap) {
7024                         vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
7025                 }
7026
7027                 if (tvp) {
7028                         add_fsevent(FSE_RENAME, ctx,
7029                                     FSE_ARG_STRING, from_len, from_name,
7030                                     FSE_ARG_FINFO, &from_finfo,
7031                                     FSE_ARG_STRING, to_len, to_name,
7032                                     FSE_ARG_FINFO, &to_finfo,
7033                                     FSE_ARG_DONE);
7034                 } else {
7035                         add_fsevent(FSE_RENAME, ctx,
7036                                     FSE_ARG_STRING, from_len, from_name,
7037                                     FSE_ARG_FINFO, &from_finfo,
7038                                     FSE_ARG_STRING, to_len, to_name,
7039                                     FSE_ARG_DONE);
7040                 }
7041         }
7042 #endif /* CONFIG_FSE */
7043
7044         /*
7045          * update filesystem's mount point data
7046          */
7047         if (mntrename) {
7048                 char *cp, *pathend, *mpname;
7049                 char * tobuf;
7050                 struct mount *mp;
7051                 int maxlen;
7052                 size_t len = 0;
7053
7054                 mp = fvp->v_mountedhere;
7055
7056                 if (vfs_busy(mp, LK_NOWAIT)) {
7057                         error = EBUSY;
7058                         goto out1;
7059                 }
7060                 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
7061
7062                 if (UIO_SEG_IS_USER_SPACE(segflg))
7063                         error = copyinstr(to, tobuf, MAXPATHLEN, &len);
7064                 else
7065                         error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
7066                 if (!error) {
7067                         /* find current mount point prefix */
7068                         pathend = &mp->mnt_vfsstat.f_mntonname[0];
7069                         for (cp = pathend; *cp != '\0'; ++cp) {
7070                                 if (*cp == '/')
7071                                         pathend = cp + 1;
7072                         }
7073                         /* find last component of target name */
7074                         for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
7075                                 if (*cp == '/')
7076                                         mpname = cp + 1;
7077                         }
7078                         /* append name to prefix */
7079                         maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
7080                         bzero(pathend, maxlen);
7081                         strlcpy(pathend, mpname, maxlen);
7082                 }
7083                 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
7084
7085                 vfs_unbusy(mp);
7086         }
7087         /*
7088          * fix up name & parent pointers.  note that we first
7089          * check that fvp has the same name/parent pointers it
7090          * had before the rename call... this is a 'weak' check
7091          * at best...
7092          *
7093          * XXX oparent and oname may not be set in the compound vnop case
7094          */
7095         if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
7096                 int update_flags;
7097
7098                 update_flags = VNODE_UPDATE_NAME;
7099
7100                 if (fdvp != tdvp)
7101                         update_flags |= VNODE_UPDATE_PARENT;
7102
7103                 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
7104         }
7105 out1:
7106         if (to_name != NULL) {
7107                 RELEASE_PATH(to_name);
7108                 to_name = NULL;
7109         }
7110         if (from_name != NULL) {
7111                 RELEASE_PATH(from_name);
7112                 from_name = NULL;
7113         }
7114         if (holding_mntlock) {
7115                 mount_unlock_renames(locked_mp);
7116                 mount_drop(locked_mp, 0);
7117                 holding_mntlock = 0;
7118         }
7119         if (tdvp) {
7120                 /*
7121                  * nameidone has to happen before we vnode_put(tdvp)
7122                  * since it may need to release the fs_nodelock on the tdvp
7123                  */
7124                 nameidone(tond);
7125
7126                 if (tvp)
7127                         vnode_put(tvp);
7128                 vnode_put(tdvp);
7129         }
7130         if (fdvp) {
7131                 /*
7132                  * nameidone has to happen before we vnode_put(fdvp)
7133                  * since it may need to release the fs_nodelock on the fdvp
7134                  */
7135                 nameidone(fromnd);
7136
7137                 if (fvp)
7138                         vnode_put(fvp);
7139                 vnode_put(fdvp);
7140         }
7141
7142         /*
7143          * If things changed after we did the namei, then we will re-drive
7144          * this rename call from the top.
7145          */
7146         if (do_retry) {
7147                 do_retry = 0;
7148                 goto retry;
7149         }
7150
7151         FREE(__rename_data, M_TEMP);
7152         return (error);
7153 }
7154
7155 int
7156 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
7157 {
7158         return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
7159             AT_FDCWD, uap->to, UIO_USERSPACE, 0));
7160 }
7161
7162 #if CONFIG_SECLUDED_RENAME
7163 int rename_ext(__unused proc_t p, struct rename_ext_args *uap, __unused int32_t *retval)
7164 {
7165         return renameat_internal(
7166                 vfs_context_current(),
7167                 AT_FDCWD, uap->from,
7168                 AT_FDCWD, uap->to,
7169                 UIO_USERSPACE, uap->flags);
7170 }
7171 #endif
7172
7173 int
7174 renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
7175 {
7176         return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
7177             uap->tofd, uap->to, UIO_USERSPACE, 0));
7178 }
7179
7180 /*
7181  * Make a directory file.
7182  *
7183  * Returns:     0                       Success
7184  *              EEXIST
7185  *      namei:???
7186  *      vnode_authorize:???
7187  *      vn_create:???
7188  */
7189 /* ARGSUSED */
7190 static int
7191 mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
7192     enum uio_seg segflg)
7193 {
7194         vnode_t vp, dvp;
7195         int error;
7196         int update_flags = 0;
7197         int batched;
7198         struct nameidata nd;
7199
7200         AUDIT_ARG(mode, vap->va_mode);
7201         NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
7202                path, ctx);
7203         nd.ni_cnd.cn_flags |= WILLBEDIR;
7204         nd.ni_flag = NAMEI_COMPOUNDMKDIR;
7205
7206 continue_lookup:
7207         error = nameiat(&nd, fd);
7208         if (error)
7209                 return (error);
7210         dvp = nd.ni_dvp;
7211         vp = nd.ni_vp;
7212
7213         if (vp != NULL) {
7214                 error = EEXIST;
7215                 goto out;
7216         }
7217
7218         batched = vnode_compound_mkdir_available(dvp);
7219
7220         VATTR_SET(vap, va_type, VDIR);
7221
7222         /*
7223          * XXX
7224          * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7225          * only get EXISTS or EISDIR for existing path components, and not that it could see
7226          * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7227          * it will fail in a spurious  manner.  Need to figure out if this is valid behavior.
7228          */
7229         if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
7230                 if (error == EACCES || error == EPERM) {
7231                         int error2;
7232
7233                         nameidone(&nd);
7234                         vnode_put(dvp);
7235                         dvp = NULLVP;
7236
7237                         /*
7238                          * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7239                          * rather than EACCESS if the target exists.
7240                          */
7241                         NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
7242                                         path, ctx);
7243                         error2 = nameiat(&nd, fd);
7244                         if (error2) {
7245                                 goto out;
7246                         } else {
7247                                 vp = nd.ni_vp;
7248                                 error = EEXIST;
7249                                 goto out;
7250                         }
7251                 }
7252
7253                 goto out;
7254         }
7255
7256         /*
7257          * make the directory
7258          */
7259         if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
7260                 if (error == EKEEPLOOKING) {
7261                         nd.ni_vp = vp;
7262                         goto continue_lookup;
7263                 }
7264
7265                 goto out;
7266         }
7267
7268         // Make sure the name & parent pointers are hooked up
7269         if (vp->v_name == NULL)
7270                 update_flags |= VNODE_UPDATE_NAME;
7271         if (vp->v_parent == NULLVP)
7272                 update_flags |= VNODE_UPDATE_PARENT;
7273
7274         if (update_flags)
7275                 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
7276
7277 #if CONFIG_FSE
7278         add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
7279 #endif
7280
7281 out:
7282         /*
7283          * nameidone has to happen before we vnode_put(dvp)
7284          * since it may need to release the fs_nodelock on the dvp
7285          */
7286         nameidone(&nd);
7287
7288         if (vp)
7289                 vnode_put(vp);
7290         if (dvp)
7291                 vnode_put(dvp);
7292
7293         return (error);
7294 }
7295
7296 /*
7297  * mkdir_extended: Create a directory; with extended security (ACL).
7298  *
7299  * Parameters:    p                       Process requesting to create the directory
7300  *                uap                     User argument descriptor (see below)
7301  *                retval                  (ignored)
7302  *
7303  * Indirect:      uap->path               Path of directory to create
7304  *                uap->mode               Access permissions to set
7305  *                uap->xsecurity          ACL to set
7306  *
7307  * Returns:        0                      Success
7308  *                !0                      Not success
7309  *
7310  */
7311 int
7312 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
7313 {
7314         int ciferror;
7315         kauth_filesec_t xsecdst;
7316         struct vnode_attr va;
7317
7318         AUDIT_ARG(owner, uap->uid, uap->gid);
7319
7320         xsecdst = NULL;
7321         if ((uap->xsecurity != USER_ADDR_NULL) &&
7322             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
7323                 return ciferror;
7324
7325         VATTR_INIT(&va);
7326         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7327         if (xsecdst != NULL)
7328                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
7329
7330         ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7331             UIO_USERSPACE);
7332         if (xsecdst != NULL)
7333                 kauth_filesec_free(xsecdst);
7334         return ciferror;
7335 }
7336
7337 int
7338 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
7339 {
7340         struct vnode_attr va;
7341
7342         VATTR_INIT(&va);
7343         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7344
7345         return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7346             UIO_USERSPACE));
7347 }
7348
7349 int
7350 mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
7351 {
7352         struct vnode_attr va;
7353
7354         VATTR_INIT(&va);
7355         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7356
7357         return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
7358             UIO_USERSPACE));
7359 }
7360
7361 static int
7362 rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
7363     enum uio_seg segflg)
7364 {
7365         vnode_t vp, dvp;
7366         int error;
7367         struct nameidata nd;
7368         char     *path = NULL;
7369         int       len=0;
7370         int has_listeners = 0;
7371         int need_event = 0;
7372         int truncated = 0;
7373 #if CONFIG_FSE
7374         struct vnode_attr va;
7375 #endif /* CONFIG_FSE */
7376         struct vnode_attr *vap = NULL;
7377         int restart_count = 0;
7378         int batched;
7379
7380         int restart_flag;
7381
7382         /*
7383          * This loop exists to restart rmdir in the unlikely case that two
7384          * processes are simultaneously trying to remove the same directory
7385          * containing orphaned appleDouble files.
7386          */
7387         do {
7388                 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
7389                     segflg, dirpath, ctx);
7390                 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
7391 continue_lookup:
7392                 restart_flag = 0;
7393                 vap = NULL;
7394
7395                 error = nameiat(&nd, fd);
7396                 if (error)
7397                         return (error);
7398
7399                 dvp = nd.ni_dvp;
7400                 vp = nd.ni_vp;
7401
7402                 if (vp) {
7403                         batched = vnode_compound_rmdir_available(vp);
7404
7405                         if (vp->v_flag & VROOT) {
7406                                 /*
7407                                  * The root of a mounted filesystem cannot be deleted.
7408                                  */
7409                                 error = EBUSY;
7410                                 goto out;
7411                         }
7412
7413                         /*
7414                          * Removed a check here; we used to abort if vp's vid
7415                          * was not the same as what we'd seen the last time around.
7416                          * I do not think that check was valid, because if we retry
7417                          * and all dirents are gone, the directory could legitimately
7418                          * be recycled but still be present in a situation where we would
7419                          * have had permission to delete.  Therefore, we won't make
7420                          * an effort to preserve that check now that we may not have a
7421                          * vp here.
7422                          */
7423
7424                         if (!batched) {
7425                                 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
7426                                 if (error) {
7427                                         if (error == ENOENT &&
7428                                             restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7429                                                 restart_flag = 1;
7430                                                 restart_count += 1;
7431                                         }
7432                                         goto out;
7433                                 }
7434                         }
7435                 } else {
7436                         batched = 1;
7437
7438                         if (!vnode_compound_rmdir_available(dvp)) {
7439                                 panic("No error, but no compound rmdir?");
7440                         }
7441                 }
7442
7443 #if CONFIG_FSE
7444                 fse_info  finfo;
7445
7446                 need_event = need_fsevent(FSE_DELETE, dvp);
7447                 if (need_event) {
7448                         if (!batched) {
7449                                 get_fse_info(vp, &finfo, ctx);
7450                         } else {
7451                                 error = vfs_get_notify_attributes(&va);
7452                                 if (error) {
7453                                         goto out;
7454                                 }
7455
7456                                 vap = &va;
7457                         }
7458                 }
7459 #endif
7460                 has_listeners = kauth_authorize_fileop_has_listeners();
7461                 if (need_event || has_listeners) {
7462                         if (path == NULL) {
7463                                 GET_PATH(path);
7464                                 if (path == NULL) {
7465                                         error = ENOMEM;
7466                                         goto out;
7467                                 }
7468                         }
7469
7470                         len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
7471 #if CONFIG_FSE
7472                         if (truncated) {
7473                                 finfo.mode |= FSE_TRUNCATED_PATH;
7474                         }
7475 #endif
7476                 }
7477
7478                 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
7479                 nd.ni_vp = vp;
7480                 if (vp == NULLVP) {
7481                         /* Couldn't find a vnode */
7482                         goto out;
7483                 }
7484
7485                 if (error == EKEEPLOOKING) {
7486                         goto continue_lookup;
7487                 } else if (batched && error == ENOENT &&
7488                     restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7489                         /*
7490                          * For compound VNOPs, the authorization callback
7491                          * may return ENOENT in case of racing hard link lookups
7492                          * redrive the lookup.
7493                          */
7494                         restart_flag = 1;
7495                         restart_count += 1;
7496                         goto out;
7497                 }
7498 #if CONFIG_APPLEDOUBLE
7499                 /*
7500                  * Special case to remove orphaned AppleDouble
7501                  * files. I don't like putting this in the kernel,
7502                  * but carbon does not like putting this in carbon either,
7503                  * so here we are.
7504                  */
7505                 if (error == ENOTEMPTY) {
7506                         error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
7507                         if (error == EBUSY) {
7508                                 goto out;
7509                         }
7510
7511
7512                         /*
7513                          * Assuming everything went well, we will try the RMDIR again
7514                          */
7515                         if (!error)
7516                                 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
7517                 }
7518 #endif /* CONFIG_APPLEDOUBLE */
7519                 /*
7520                  * Call out to allow 3rd party notification of delete.
7521                  * Ignore result of kauth_authorize_fileop call.
7522                  */
7523                 if (!error) {
7524                         if (has_listeners) {
7525                                 kauth_authorize_fileop(vfs_context_ucred(ctx),
7526                                                 KAUTH_FILEOP_DELETE,
7527                                                 (uintptr_t)vp,
7528                                                 (uintptr_t)path);
7529                         }
7530
7531                         if (vp->v_flag & VISHARDLINK) {
7532                                 // see the comment in unlink1() about why we update
7533                                 // the parent of a hard link when it is removed
7534                                 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
7535                         }
7536
7537 #if CONFIG_FSE
7538                         if (need_event) {
7539                                 if (vap) {
7540                                         vnode_get_fse_info_from_vap(vp, &finfo, vap);
7541                                 }
7542                                 add_fsevent(FSE_DELETE, ctx,
7543                                                 FSE_ARG_STRING, len, path,
7544                                                 FSE_ARG_FINFO, &finfo,
7545                                                 FSE_ARG_DONE);
7546                         }
7547 #endif
7548                 }
7549
7550 out:
7551                 if (path != NULL) {
7552                         RELEASE_PATH(path);
7553                         path = NULL;
7554                 }
7555                 /*
7556                  * nameidone has to happen before we vnode_put(dvp)
7557                  * since it may need to release the fs_nodelock on the dvp
7558                  */
7559                 nameidone(&nd);
7560                 vnode_put(dvp);
7561
7562                 if (vp)
7563                         vnode_put(vp);
7564
7565                 if (restart_flag == 0) {
7566                         wakeup_one((caddr_t)vp);
7567                         return (error);
7568                 }
7569                 tsleep(vp, PVFS, "rm AD", 1);
7570
7571         } while (restart_flag != 0);
7572
7573         return (error);
7574
7575 }
7576
7577 /*
7578  * Remove a directory file.
7579  */
7580 /* ARGSUSED */
7581 int
7582 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
7583 {
7584         return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
7585             CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
7586 }
7587
7588 /* Get direntry length padded to 8 byte alignment */
7589 #define DIRENT64_LEN(namlen) \
7590         ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
7591
7592 errno_t
7593 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
7594                 int *numdirent, vfs_context_t ctxp)
7595 {
7596         /* Check if fs natively supports VNODE_READDIR_EXTENDED */
7597         if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
7598                    ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0))  {
7599                 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
7600         } else {
7601                 size_t bufsize;
7602                 void * bufptr;
7603                 uio_t auio;
7604                 struct direntry *entry64;
7605                 struct dirent *dep;
7606                 int bytesread;
7607                 int error;
7608
7609                 /*
7610                  * Our kernel buffer needs to be smaller since re-packing
7611                  * will expand each dirent.  The worse case (when the name
7612                  * length is 3) corresponds to a struct direntry size of 32
7613                  * bytes (8-byte aligned) and a struct dirent size of 12 bytes
7614                  * (4-byte aligned).  So having a buffer that is 3/8 the size
7615                  * will prevent us from reading more than we can pack.
7616                  *
7617                  * Since this buffer is wired memory, we will limit the
7618                  * buffer size to a maximum of 32K. We would really like to
7619                  * use 32K in the MIN(), but we use magic number 87371 to
7620                  * prevent uio_resid() * 3 / 8 from overflowing.
7621                  */
7622                 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
7623                 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
7624                 if (bufptr == NULL) {
7625                         return ENOMEM;
7626                 }
7627
7628                 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
7629                 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
7630                 auio->uio_offset = uio->uio_offset;
7631
7632                 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
7633
7634                 dep = (struct dirent *)bufptr;
7635                 bytesread = bufsize - uio_resid(auio);
7636
7637                 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
7638                        M_TEMP, M_WAITOK);
7639                 /*
7640                  * Convert all the entries and copy them out to user's buffer.
7641                  */
7642                 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
7643                         size_t  enbufsize = DIRENT64_LEN(dep->d_namlen);
7644
7645                         bzero(entry64, enbufsize);
7646                         /* Convert a dirent to a dirent64. */
7647                         entry64->d_ino = dep->d_ino;
7648                         entry64->d_seekoff = 0;
7649                         entry64->d_reclen = enbufsize;
7650                         entry64->d_namlen = dep->d_namlen;
7651                         entry64->d_type = dep->d_type;
7652                         bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
7653
7654                         /* Move to next entry. */
7655                         dep = (struct dirent *)((char *)dep + dep->d_reclen);
7656
7657                         /* Copy entry64 to user's buffer. */
7658                         error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
7659                 }
7660
7661                 /* Update the real offset using the offset we got from VNOP_READDIR. */
7662                 if (error == 0) {
7663                         uio->uio_offset = auio->uio_offset;
7664                 }
7665                 uio_free(auio);
7666                 FREE(bufptr, M_TEMP);
7667                 FREE(entry64, M_TEMP);
7668                 return (error);
7669         }
7670 }
7671
7672 #define GETDIRENTRIES_MAXBUFSIZE        (128 * 1024 * 1024U)
7673
7674 /*
7675  * Read a block of directory entries in a file system independent format.
7676  */
7677 static int
7678 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
7679                      off_t *offset, int flags)
7680 {
7681         vnode_t vp;
7682         struct vfs_context context = *vfs_context_current();    /* local copy */
7683         struct fileproc *fp;
7684         uio_t auio;
7685         int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7686         off_t loff;
7687         int error, eofflag, numdirent;
7688         char uio_buf[ UIO_SIZEOF(1) ];
7689
7690         error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
7691         if (error) {
7692                 return (error);
7693         }
7694         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7695                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7696                 error = EBADF;
7697                 goto out;
7698         }
7699
7700         if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
7701                 bufsize = GETDIRENTRIES_MAXBUFSIZE;
7702
7703 #if CONFIG_MACF
7704         error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
7705         if (error)
7706                 goto out;
7707 #endif
7708         if ( (error = vnode_getwithref(vp)) ) {
7709                 goto out;
7710         }
7711         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7712
7713 unionread:
7714         if (vp->v_type != VDIR) {
7715                 (void)vnode_put(vp);
7716                 error = EINVAL;
7717                 goto out;
7718         }
7719
7720 #if CONFIG_MACF
7721         error = mac_vnode_check_readdir(&context, vp);
7722         if (error != 0) {
7723                 (void)vnode_put(vp);
7724                 goto out;
7725         }
7726 #endif /* MAC */
7727
7728         loff = fp->f_fglob->fg_offset;
7729         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
7730         uio_addiov(auio, bufp, bufsize);
7731
7732         if (flags & VNODE_READDIR_EXTENDED) {
7733                 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
7734                 fp->f_fglob->fg_offset = uio_offset(auio);
7735         } else {
7736                 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
7737                 fp->f_fglob->fg_offset = uio_offset(auio);
7738         }
7739         if (error) {
7740                 (void)vnode_put(vp);
7741                 goto out;
7742         }
7743
7744         if ((user_ssize_t)bufsize == uio_resid(auio)){
7745                 if (union_dircheckp) {
7746                         error = union_dircheckp(&vp, fp, &context);
7747                         if (error == -1)
7748                                 goto unionread;
7749                         if (error)
7750                                 goto out;
7751                 }
7752
7753                 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
7754                         struct vnode *tvp = vp;
7755                         if (lookup_traverse_union(tvp, &vp, &context) == 0) {
7756                                 vnode_ref(vp);
7757                                 fp->f_fglob->fg_data = (caddr_t) vp;
7758                                 fp->f_fglob->fg_offset = 0;
7759                                 vnode_rele(tvp);
7760                                 vnode_put(tvp);
7761                                 goto unionread;
7762                         }
7763                         vp = tvp;
7764                 }
7765         }
7766
7767         vnode_put(vp);
7768         if (offset) {
7769                 *offset = loff;
7770         }
7771
7772         *bytesread = bufsize - uio_resid(auio);
7773 out:
7774         file_drop(fd);
7775         return (error);
7776 }
7777
7778
7779 int
7780 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
7781 {
7782         off_t offset;
7783         ssize_t bytesread;
7784         int error;
7785
7786         AUDIT_ARG(fd, uap->fd);
7787         error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
7788
7789         if (error == 0) {
7790                 if (proc_is64bit(p)) {
7791                         user64_long_t base = (user64_long_t)offset;
7792                         error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
7793                 } else {
7794                         user32_long_t base = (user32_long_t)offset;
7795                         error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
7796                 }
7797                 *retval = bytesread;
7798         }
7799         return (error);
7800 }
7801
7802 int
7803 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
7804 {
7805         off_t offset;
7806         ssize_t bytesread;
7807         int error;
7808
7809         AUDIT_ARG(fd, uap->fd);
7810         error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
7811
7812         if (error == 0) {
7813                 *retval = bytesread;
7814                 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
7815         }
7816         return (error);
7817 }
7818
7819
7820 /*
7821  * Set the mode mask for creation of filesystem nodes.
7822  * XXX implement xsecurity
7823  */
7824 #define UMASK_NOXSECURITY        (void *)1      /* leave existing xsecurity alone */
7825 static int
7826 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
7827 {
7828         struct filedesc *fdp;
7829
7830         AUDIT_ARG(mask, newmask);
7831         proc_fdlock(p);
7832         fdp = p->p_fd;
7833         *retval = fdp->fd_cmask;
7834         fdp->fd_cmask = newmask & ALLPERMS;
7835         proc_fdunlock(p);
7836         return (0);
7837 }
7838
7839 /*
7840  * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
7841  *
7842  * Parameters:    p                       Process requesting to set the umask
7843  *                uap                     User argument descriptor (see below)
7844  *                retval                  umask of the process (parameter p)
7845  *
7846  * Indirect:      uap->newmask            umask to set
7847  *                uap->xsecurity          ACL to set
7848  *
7849  * Returns:        0                      Success
7850  *                !0                      Not success
7851  *
7852  */
7853 int
7854 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
7855 {
7856         int ciferror;
7857         kauth_filesec_t xsecdst;
7858
7859         xsecdst = KAUTH_FILESEC_NONE;
7860         if (uap->xsecurity != USER_ADDR_NULL) {
7861                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
7862                         return ciferror;
7863         } else {
7864                 xsecdst = KAUTH_FILESEC_NONE;
7865         }
7866
7867         ciferror = umask1(p, uap->newmask, xsecdst, retval);
7868
7869         if (xsecdst != KAUTH_FILESEC_NONE)
7870                 kauth_filesec_free(xsecdst);
7871         return ciferror;
7872 }
7873
7874 int
7875 umask(proc_t p, struct umask_args *uap, int32_t *retval)
7876 {
7877         return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
7878 }
7879
7880 /*
7881  * Void all references to file by ripping underlying filesystem
7882  * away from vnode.
7883  */
7884 /* ARGSUSED */
7885 int
7886 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
7887 {
7888         vnode_t vp;
7889         struct vnode_attr va;
7890         vfs_context_t ctx = vfs_context_current();
7891         int error;
7892         struct nameidata nd;
7893
7894         NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
7895                uap->path, ctx);
7896         error = namei(&nd);
7897         if (error)
7898                 return (error);
7899         vp = nd.ni_vp;
7900
7901         nameidone(&nd);
7902
7903         if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
7904                 error = ENOTSUP;
7905                 goto out;
7906         }
7907
7908         if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
7909                 error = EBUSY;
7910                 goto out;
7911         }
7912
7913 #if CONFIG_MACF
7914         error = mac_vnode_check_revoke(ctx, vp);
7915         if (error)
7916                 goto out;
7917 #endif
7918
7919         VATTR_INIT(&va);
7920         VATTR_WANTED(&va, va_uid);
7921         if ((error = vnode_getattr(vp, &va, ctx)))
7922                 goto out;
7923         if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
7924             (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
7925                 goto out;
7926         if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
7927                 VNOP_REVOKE(vp, REVOKEALL, ctx);
7928 out:
7929         vnode_put(vp);
7930         return (error);
7931 }
7932
7933
7934 /*
7935  *  HFS/HFS PlUS SPECIFIC SYSTEM CALLS
7936  *  The following system calls are designed to support features
7937  *  which are specific to the HFS & HFS Plus volume formats
7938  */
7939
7940
7941 /*
7942  * Obtain attribute information on objects in a directory while enumerating
7943  * the directory.
7944  */
7945 /* ARGSUSED */
7946 int
7947 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
7948 {
7949         vnode_t vp;
7950         struct fileproc *fp;
7951         uio_t auio = NULL;
7952         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7953         uint32_t count, savecount;
7954         uint32_t newstate;
7955         int error, eofflag;
7956         uint32_t loff;
7957         struct attrlist attributelist;
7958         vfs_context_t ctx = vfs_context_current();
7959         int fd = uap->fd;
7960         char uio_buf[ UIO_SIZEOF(1) ];
7961         kauth_action_t action;
7962
7963         AUDIT_ARG(fd, fd);
7964
7965         /* Get the attributes into kernel space */
7966         if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
7967                 return(error);
7968         }
7969         if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
7970                 return(error);
7971         }
7972         savecount = count;
7973         if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
7974                 return (error);
7975         }
7976         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7977                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7978                 error = EBADF;
7979                 goto out;
7980         }
7981
7982
7983 #if CONFIG_MACF
7984         error = mac_file_check_change_offset(vfs_context_ucred(ctx),
7985             fp->f_fglob);
7986         if (error)
7987                 goto out;
7988 #endif
7989
7990
7991         if ( (error = vnode_getwithref(vp)) )
7992                 goto out;
7993
7994         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7995
7996 unionread:
7997         if (vp->v_type != VDIR) {
7998                 (void)vnode_put(vp);
7999                 error = EINVAL;
8000                 goto out;
8001         }
8002
8003 #if CONFIG_MACF
8004         error = mac_vnode_check_readdir(ctx, vp);
8005         if (error != 0) {
8006                 (void)vnode_put(vp);
8007                 goto out;
8008         }
8009 #endif /* MAC */
8010
8011         /* set up the uio structure which will contain the users return buffer */
8012         loff = fp->f_fglob->fg_offset;
8013         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8014         uio_addiov(auio, uap->buffer, uap->buffersize);
8015
8016         /*
8017          * If the only item requested is file names, we can let that past with
8018          * just LIST_DIRECTORY.  If they want any other attributes, that means
8019          * they need SEARCH as well.
8020          */
8021         action = KAUTH_VNODE_LIST_DIRECTORY;
8022         if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
8023             attributelist.fileattr || attributelist.dirattr)
8024                 action |= KAUTH_VNODE_SEARCH;
8025
8026         if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
8027
8028                 /* Believe it or not, uap->options only has 32-bits of valid
8029                  * info, so truncate before extending again */
8030
8031                 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
8032                                 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
8033         }
8034
8035         if (error) {
8036                 (void) vnode_put(vp);
8037                 goto out;
8038         }
8039
8040         /*
8041          * If we've got the last entry of a directory in a union mount
8042          * then reset the eofflag and pretend there's still more to come.
8043          * The next call will again set eofflag and the buffer will be empty,
8044          * so traverse to the underlying directory and do the directory
8045          * read there.
8046          */
8047         if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
8048                 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
8049                         eofflag = 0;
8050                 } else {                                                // Empty buffer
8051                         struct vnode *tvp = vp;
8052                         if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
8053                                 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
8054                                 fp->f_fglob->fg_data = (caddr_t) vp;
8055                                 fp->f_fglob->fg_offset = 0; // reset index for new dir
8056                                 count = savecount;
8057                                 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
8058                                 vnode_put(tvp);
8059                                 goto unionread;
8060                         }
8061                         vp = tvp;
8062                 }
8063         }
8064
8065         (void)vnode_put(vp);
8066
8067         if (error)
8068                 goto out;
8069         fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
8070
8071         if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
8072                 goto out;
8073         if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
8074                 goto out;
8075         if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
8076                 goto out;
8077
8078         *retval = eofflag;  /* similar to getdirentries */
8079         error = 0;
8080 out:
8081         file_drop(fd);
8082         return (error); /* return error earlier, an retval of 0 or 1 now */
8083
8084 } /* end of getdirentriesattr system call */
8085
8086 /*
8087 * Exchange data between two files
8088 */
8089
8090 /* ARGSUSED */
8091 int
8092 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
8093 {
8094
8095         struct nameidata fnd, snd;
8096         vfs_context_t ctx = vfs_context_current();
8097         vnode_t fvp;
8098         vnode_t svp;
8099         int error;
8100         u_int32_t nameiflags;
8101         char *fpath = NULL;
8102         char *spath = NULL;
8103         int   flen=0, slen=0;
8104         int from_truncated=0, to_truncated=0;
8105 #if CONFIG_FSE
8106         fse_info f_finfo, s_finfo;
8107 #endif
8108
8109         nameiflags = 0;
8110         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8111
8112         NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
8113                UIO_USERSPACE, uap->path1, ctx);
8114
8115         error = namei(&fnd);
8116         if (error)
8117                 goto out2;
8118
8119         nameidone(&fnd);
8120         fvp = fnd.ni_vp;
8121
8122         NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
8123                UIO_USERSPACE, uap->path2, ctx);
8124
8125         error = namei(&snd);
8126         if (error) {
8127                 vnode_put(fvp);
8128                 goto out2;
8129         }
8130         nameidone(&snd);
8131         svp = snd.ni_vp;
8132
8133         /*
8134          * if the files are the same, return an inval error
8135          */
8136         if (svp == fvp) {
8137                 error = EINVAL;
8138                 goto out;
8139         }
8140
8141         /*
8142          * if the files are on different volumes, return an error
8143          */
8144         if (svp->v_mount != fvp->v_mount) {
8145                 error = EXDEV;
8146                 goto out;
8147         }
8148
8149         /* If they're not files, return an error */
8150         if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
8151                 error = EINVAL;
8152                 goto out;
8153         }
8154
8155 #if CONFIG_MACF
8156         error = mac_vnode_check_exchangedata(ctx,
8157             fvp, svp);
8158         if (error)
8159                 goto out;
8160 #endif
8161         if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
8162             ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
8163                 goto out;
8164
8165         if (
8166 #if CONFIG_FSE
8167         need_fsevent(FSE_EXCHANGE, fvp) ||
8168 #endif
8169         kauth_authorize_fileop_has_listeners()) {
8170                 GET_PATH(fpath);
8171                 GET_PATH(spath);
8172                 if (fpath == NULL || spath == NULL) {
8173                         error = ENOMEM;
8174                         goto out;
8175                 }
8176
8177                 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
8178                 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
8179
8180 #if CONFIG_FSE
8181                 get_fse_info(fvp, &f_finfo, ctx);
8182                 get_fse_info(svp, &s_finfo, ctx);
8183                 if (from_truncated || to_truncated) {
8184                         // set it here since only the f_finfo gets reported up to user space
8185                         f_finfo.mode |= FSE_TRUNCATED_PATH;
8186                 }
8187 #endif
8188         }
8189         /* Ok, make the call */
8190         error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
8191
8192         if (error == 0) {
8193             const char *tmpname;
8194
8195             if (fpath != NULL && spath != NULL) {
8196                     /* call out to allow 3rd party notification of exchangedata.
8197                      * Ignore result of kauth_authorize_fileop call.
8198                      */
8199                     kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
8200                                            (uintptr_t)fpath, (uintptr_t)spath);
8201             }
8202             name_cache_lock();
8203
8204             tmpname     = fvp->v_name;
8205             fvp->v_name = svp->v_name;
8206             svp->v_name = tmpname;
8207
8208             if (fvp->v_parent != svp->v_parent) {
8209                 vnode_t tmp;
8210
8211                 tmp           = fvp->v_parent;
8212                 fvp->v_parent = svp->v_parent;
8213                 svp->v_parent = tmp;
8214             }
8215             name_cache_unlock();
8216
8217 #if CONFIG_FSE
8218             if (fpath != NULL && spath != NULL) {
8219                     add_fsevent(FSE_EXCHANGE, ctx,
8220                                 FSE_ARG_STRING, flen, fpath,
8221                                 FSE_ARG_FINFO, &f_finfo,
8222                                 FSE_ARG_STRING, slen, spath,
8223                                 FSE_ARG_FINFO, &s_finfo,
8224                                 FSE_ARG_DONE);
8225             }
8226 #endif
8227         }
8228
8229 out:
8230         if (fpath != NULL)
8231                 RELEASE_PATH(fpath);
8232         if (spath != NULL)
8233                 RELEASE_PATH(spath);
8234         vnode_put(svp);
8235         vnode_put(fvp);
8236 out2:
8237         return (error);
8238 }
8239
8240 /*
8241  * Return (in MB) the amount of freespace on the given vnode's volume.
8242  */
8243 uint32_t freespace_mb(vnode_t vp);
8244
8245 uint32_t
8246 freespace_mb(vnode_t vp)
8247 {
8248         vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
8249         return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
8250                 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
8251 }
8252
8253 #if CONFIG_SEARCHFS
8254
8255 /* ARGSUSED */
8256
8257 int
8258 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
8259 {
8260         vnode_t vp, tvp;
8261         int i, error=0;
8262         int fserror = 0;
8263         struct nameidata nd;
8264         struct user64_fssearchblock searchblock;
8265         struct searchstate *state;
8266         struct attrlist *returnattrs;
8267         struct timeval timelimit;
8268         void *searchparams1,*searchparams2;
8269         uio_t auio = NULL;
8270         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8271         uint32_t nummatches;
8272         int mallocsize;
8273         uint32_t nameiflags;
8274         vfs_context_t ctx = vfs_context_current();
8275         char uio_buf[ UIO_SIZEOF(1) ];
8276
8277         /* Start by copying in fsearchblock parameter list */
8278     if (IS_64BIT_PROCESS(p)) {
8279         error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
8280         timelimit.tv_sec = searchblock.timelimit.tv_sec;
8281         timelimit.tv_usec = searchblock.timelimit.tv_usec;
8282     }
8283     else {
8284         struct user32_fssearchblock tmp_searchblock;
8285
8286         error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
8287         // munge into 64-bit version
8288         searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
8289         searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
8290         searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
8291         searchblock.maxmatches = tmp_searchblock.maxmatches;
8292                 /*
8293                  * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8294                  * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8295                  */
8296         timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
8297         timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
8298         searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
8299         searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
8300         searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
8301         searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
8302         searchblock.searchattrs = tmp_searchblock.searchattrs;
8303     }
8304         if (error)
8305                 return(error);
8306
8307         /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
8308          */
8309         if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
8310                 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
8311                 return(EINVAL);
8312
8313         /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8314         /* It all has to do into local memory and it's not that big so we might as well  put it all together. */
8315         /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8316         /* block.                                                                                             */
8317         /*                                                                                                    */
8318         /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate        */
8319         /*       due to the changes in rdar://problem/12438273.  That way if a 3rd party file system          */
8320         /*       assumes the size is still 556 bytes it will continue to work                                 */
8321
8322         mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
8323                 sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
8324
8325         MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
8326
8327         /* Now set up the various pointers to the correct place in our newly allocated memory */
8328
8329         searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
8330         returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
8331         state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
8332
8333         /* Now copy in the stuff given our local variables. */
8334
8335         if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
8336                 goto freeandexit;
8337
8338         if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
8339                 goto freeandexit;
8340
8341         if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
8342                 goto freeandexit;
8343
8344         if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
8345                 goto freeandexit;
8346
8347         /*
8348          * When searching a union mount, need to set the
8349          * start flag at the first call on each layer to
8350          * reset state for the new volume.
8351          */
8352         if (uap->options & SRCHFS_START)
8353                 state->ss_union_layer = 0;
8354         else
8355                 uap->options |= state->ss_union_flags;
8356         state->ss_union_flags = 0;
8357
8358         /*
8359          * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8360          * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
8361          * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8362          * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
8363          * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8364          */
8365
8366         if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
8367                 attrreference_t* string_ref;
8368                 u_int32_t* start_length;
8369                 user64_size_t param_length;
8370
8371                 /* validate searchparams1 */
8372                 param_length = searchblock.sizeofsearchparams1;
8373                 /* skip the word that specifies length of the buffer */
8374                 start_length= (u_int32_t*) searchparams1;
8375                 start_length= start_length+1;
8376                 string_ref= (attrreference_t*) start_length;
8377
8378                 /* ensure no negative offsets or too big offsets */
8379                 if (string_ref->attr_dataoffset < 0 ) {
8380                         error = EINVAL;
8381                         goto freeandexit;
8382                 }
8383                 if (string_ref->attr_length > MAXPATHLEN) {
8384                         error = EINVAL;
8385                         goto freeandexit;
8386                 }
8387
8388                 /* Check for pointer overflow in the string ref */
8389                 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
8390                         error = EINVAL;
8391                         goto freeandexit;
8392                 }
8393
8394                 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
8395                         error = EINVAL;
8396                         goto freeandexit;
8397                 }
8398                 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
8399                         error = EINVAL;
8400                         goto freeandexit;
8401                 }
8402         }
8403
8404         /* set up the uio structure which will contain the users return buffer */
8405         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8406         uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
8407
8408         nameiflags = 0;
8409         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8410         NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
8411                UIO_USERSPACE, uap->path, ctx);
8412
8413         error = namei(&nd);
8414         if (error)
8415                 goto freeandexit;
8416         vp = nd.ni_vp;
8417         nameidone(&nd);
8418
8419         /*
8420          * Switch to the root vnode for the volume
8421          */
8422         error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
8423         vnode_put(vp);
8424         if (error)
8425                 goto freeandexit;
8426         vp = tvp;
8427
8428         /*
8429          * If it's a union mount, the path lookup takes
8430          * us to the top layer. But we may need to descend
8431          * to a lower layer. For non-union mounts the layer
8432          * is always zero.
8433          */
8434         for (i = 0; i < (int) state->ss_union_layer; i++) {
8435                 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
8436                         break;
8437                 tvp = vp;
8438                 vp = vp->v_mount->mnt_vnodecovered;
8439                 if (vp == NULL) {
8440                         vnode_put(tvp);
8441                         error = ENOENT;
8442                         goto freeandexit;
8443                 }
8444                 vnode_getwithref(vp);
8445                 vnode_put(tvp);
8446         }
8447
8448 #if CONFIG_MACF
8449         error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
8450         if (error) {
8451                 vnode_put(vp);
8452                 goto freeandexit;
8453         }
8454 #endif
8455
8456
8457         /*
8458          * If searchblock.maxmatches == 0, then skip the search. This has happened
8459          * before and sometimes the underlying code doesnt deal with it well.
8460          */
8461          if (searchblock.maxmatches == 0) {
8462                 nummatches = 0;
8463                 goto saveandexit;
8464          }
8465
8466         /*
8467          * Allright, we have everything we need, so lets make that call.
8468          *
8469          * We keep special track of the return value from the file system:
8470          * EAGAIN is an acceptable error condition that shouldn't keep us
8471          * from copying out any results...
8472          */
8473
8474         fserror = VNOP_SEARCHFS(vp,
8475                 searchparams1,
8476                 searchparams2,
8477                 &searchblock.searchattrs,
8478                 (u_long)searchblock.maxmatches,
8479                 &timelimit,
8480                 returnattrs,
8481                 &nummatches,
8482                 (u_long)uap->scriptcode,
8483                 (u_long)uap->options,
8484                 auio,
8485                 (struct searchstate *) &state->ss_fsstate,
8486                 ctx);
8487
8488         /*
8489          * If it's a union mount we need to be called again
8490          * to search the mounted-on filesystem.
8491          */
8492         if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
8493                 state->ss_union_flags = SRCHFS_START;
8494                 state->ss_union_layer++;        // search next layer down
8495                 fserror = EAGAIN;
8496         }
8497
8498 saveandexit:
8499
8500         vnode_put(vp);
8501
8502         /* Now copy out the stuff that needs copying out. That means the number of matches, the
8503            search state.  Everything was already put into he return buffer by the vop call. */
8504
8505         if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
8506                 goto freeandexit;
8507
8508         if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
8509                 goto freeandexit;
8510
8511         error = fserror;
8512
8513 freeandexit:
8514
8515         FREE(searchparams1,M_TEMP);
8516
8517         return(error);
8518
8519
8520 } /* end of searchfs system call */
8521
8522 #else /* CONFIG_SEARCHFS */
8523
8524 int
8525 searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
8526 {
8527         return (ENOTSUP);
8528 }
8529
8530 #endif /* CONFIG_SEARCHFS */
8531
8532
8533 lck_grp_attr_t *  nspace_group_attr;
8534 lck_attr_t *      nspace_lock_attr;
8535 lck_grp_t *       nspace_mutex_group;
8536
8537 lck_mtx_t         nspace_handler_lock;
8538 lck_mtx_t         nspace_handler_exclusion_lock;
8539
8540 time_t snapshot_timestamp=0;
8541 int nspace_allow_virtual_devs=0;
8542
8543 void nspace_handler_init(void);
8544
8545 typedef struct nspace_item_info {
8546         struct vnode *vp;
8547         void         *arg;
8548         uint64_t      op;
8549         uint32_t      vid;
8550         uint32_t      flags;
8551         uint32_t      token;
8552         uint32_t      refcount;
8553 } nspace_item_info;
8554
8555 #define MAX_NSPACE_ITEMS   128
8556 nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
8557 uint32_t      nspace_item_idx=0;              // also used as the sleep/wakeup rendezvous address
8558 uint32_t      nspace_token_id=0;
8559 uint32_t      nspace_handler_timeout = 15;    // seconds
8560
8561 #define NSPACE_ITEM_NEW         0x0001
8562 #define NSPACE_ITEM_PROCESSING  0x0002
8563 #define NSPACE_ITEM_DEAD        0x0004
8564 #define NSPACE_ITEM_CANCELLED   0x0008
8565 #define NSPACE_ITEM_DONE        0x0010
8566 #define NSPACE_ITEM_RESET_TIMER 0x0020
8567
8568 #define NSPACE_ITEM_NSPACE_EVENT   0x0040
8569 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
8570
8571 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
8572
8573 //#pragma optimization_level 0
8574
8575 typedef enum {
8576         NSPACE_HANDLER_NSPACE = 0,
8577         NSPACE_HANDLER_SNAPSHOT = 1,
8578
8579         NSPACE_HANDLER_COUNT,
8580 } nspace_type_t;
8581
8582 typedef struct {
8583         uint64_t handler_tid;
8584         struct proc *handler_proc;
8585         int handler_busy;
8586 } nspace_handler_t;
8587
8588 nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
8589
8590 /* namespace fsctl functions */
8591 static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
8592 static int nspace_item_flags_for_type(nspace_type_t nspace_type);
8593 static int nspace_open_flags_for_type(nspace_type_t nspace_type);
8594 static nspace_type_t nspace_type_for_op(uint64_t op);
8595 static int nspace_is_special_process(struct proc *proc);
8596 static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
8597 static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
8598 static int validate_namespace_args (int is64bit, int size);
8599 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
8600
8601
8602 static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
8603 {
8604         switch(nspace_type) {
8605                 case NSPACE_HANDLER_NSPACE:
8606                         return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
8607                 case NSPACE_HANDLER_SNAPSHOT:
8608                         return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
8609                 default:
8610                         printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
8611                         return 0;
8612         }
8613 }
8614
8615 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
8616 {
8617         switch(nspace_type) {
8618                 case NSPACE_HANDLER_NSPACE:
8619                         return NSPACE_ITEM_NSPACE_EVENT;
8620                 case NSPACE_HANDLER_SNAPSHOT:
8621                         return NSPACE_ITEM_SNAPSHOT_EVENT;
8622                 default:
8623                         printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
8624                         return 0;
8625         }
8626 }
8627
8628 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
8629 {
8630         switch(nspace_type) {
8631                 case NSPACE_HANDLER_NSPACE:
8632                         return FREAD | FWRITE | O_EVTONLY;
8633                 case NSPACE_HANDLER_SNAPSHOT:
8634                         return FREAD | O_EVTONLY;
8635                 default:
8636                         printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
8637                         return 0;
8638         }
8639 }
8640
8641 static inline nspace_type_t nspace_type_for_op(uint64_t op)
8642 {
8643         switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
8644                 case NAMESPACE_HANDLER_NSPACE_EVENT:
8645                         return NSPACE_HANDLER_NSPACE;
8646                 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
8647                         return NSPACE_HANDLER_SNAPSHOT;
8648                 default:
8649                         printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
8650                         return NSPACE_HANDLER_NSPACE;
8651         }
8652 }
8653
8654 static inline int nspace_is_special_process(struct proc *proc)
8655 {
8656         int i;
8657         for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
8658                 if (proc == nspace_handlers[i].handler_proc)
8659                         return 1;
8660         }
8661         return 0;
8662 }
8663
8664 void
8665 nspace_handler_init(void)
8666 {
8667         nspace_lock_attr    = lck_attr_alloc_init();
8668         nspace_group_attr   = lck_grp_attr_alloc_init();
8669         nspace_mutex_group  = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
8670         lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
8671         lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
8672         memset(&nspace_items[0], 0, sizeof(nspace_items));
8673 }
8674
8675 void
8676 nspace_proc_exit(struct proc *p)
8677 {
8678         int i, event_mask = 0;
8679
8680         for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
8681                 if (p == nspace_handlers[i].handler_proc) {
8682                         event_mask |= nspace_item_flags_for_type(i);
8683                         nspace_handlers[i].handler_tid = 0;
8684                         nspace_handlers[i].handler_proc = NULL;
8685                 }
8686         }
8687
8688         if (event_mask == 0) {
8689                 return;
8690         }
8691
8692         if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
8693                 // if this process was the snapshot handler, zero snapshot_timeout
8694                 snapshot_timestamp = 0;
8695         }
8696
8697         //
8698         // unblock anyone that's waiting for the handler that died
8699         //
8700         lck_mtx_lock(&nspace_handler_lock);
8701         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8702                 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
8703
8704                         if ( nspace_items[i].flags & event_mask ) {
8705
8706                                 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
8707                                         vnode_lock_spin(nspace_items[i].vp);
8708                                         nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8709                                         vnode_unlock(nspace_items[i].vp);
8710                                 }
8711                                 nspace_items[i].vp = NULL;
8712                                 nspace_items[i].vid = 0;
8713                                 nspace_items[i].flags = NSPACE_ITEM_DONE;
8714                                 nspace_items[i].token = 0;
8715
8716                                 wakeup((caddr_t)&(nspace_items[i].vp));
8717                         }
8718                 }
8719         }
8720
8721         wakeup((caddr_t)&nspace_item_idx);
8722         lck_mtx_unlock(&nspace_handler_lock);
8723 }
8724
8725
8726 int
8727 resolve_nspace_item(struct vnode *vp, uint64_t op)
8728 {
8729         return resolve_nspace_item_ext(vp, op, NULL);
8730 }
8731
8732 int
8733 resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
8734 {
8735         int i, error, keep_waiting;
8736         struct timespec ts;
8737         nspace_type_t nspace_type = nspace_type_for_op(op);
8738
8739         // only allow namespace events on regular files, directories and symlinks.
8740         if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
8741                 return 0;
8742         }
8743
8744         //
8745         // if this is a snapshot event and the vnode is on a
8746         // disk image just pretend nothing happened since any
8747         // change to the disk image will cause the disk image
8748         // itself to get backed up and this avoids multi-way
8749         // deadlocks between the snapshot handler and the ever
8750         // popular diskimages-helper process.  the variable
8751         // nspace_allow_virtual_devs allows this behavior to
8752         // be overridden (for use by the Mobile TimeMachine
8753         // testing infrastructure which uses disk images)
8754         //
8755         if (   (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
8756             && (vp->v_mount != NULL)
8757             && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
8758             && !nspace_allow_virtual_devs) {
8759
8760                 return 0;
8761         }
8762
8763         // if (thread_tid(current_thread()) == namespace_handler_tid) {
8764         if (nspace_handlers[nspace_type].handler_proc == NULL) {
8765                 return 0;
8766         }
8767
8768         if (nspace_is_special_process(current_proc())) {
8769                 return EDEADLK;
8770         }
8771
8772         lck_mtx_lock(&nspace_handler_lock);
8773
8774 retry:
8775         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8776                 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
8777                         break;
8778                 }
8779         }
8780
8781         if (i >= MAX_NSPACE_ITEMS) {
8782                 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8783                         if (nspace_items[i].flags == 0) {
8784                                 break;
8785                         }
8786                 }
8787         } else {
8788                 nspace_items[i].refcount++;
8789         }
8790
8791         if (i >= MAX_NSPACE_ITEMS) {
8792                 ts.tv_sec = nspace_handler_timeout;
8793                 ts.tv_nsec = 0;
8794
8795                 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
8796                 if (error == 0) {
8797                         // an entry got free'd up, go see if we can get a slot
8798                         goto retry;
8799                 } else {
8800                         lck_mtx_unlock(&nspace_handler_lock);
8801                         return error;
8802                 }
8803         }
8804
8805         //
8806         // if it didn't already exist, add it.  if it did exist
8807         // we'll get woken up when someone does a wakeup() on
8808         // the slot in the nspace_items table.
8809         //
8810         if (vp != nspace_items[i].vp) {
8811                 nspace_items[i].vp = vp;
8812                 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg;  // arg is {NULL, true, uio *} - only pass uio thru to the user
8813                 nspace_items[i].op = op;
8814                 nspace_items[i].vid = vnode_vid(vp);
8815                 nspace_items[i].flags = NSPACE_ITEM_NEW;
8816                 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
8817                 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
8818                         if (arg) {
8819                                 vnode_lock_spin(vp);
8820                                 vp->v_flag |= VNEEDSSNAPSHOT;
8821                                 vnode_unlock(vp);
8822                         }
8823                 }
8824
8825                 nspace_items[i].token = 0;
8826                 nspace_items[i].refcount = 1;
8827
8828                 wakeup((caddr_t)&nspace_item_idx);
8829         }
8830
8831         //
8832         // Now go to sleep until the handler does a wakeup on this
8833         // slot in the nspace_items table (or we timeout).
8834         //
8835         keep_waiting = 1;
8836         while(keep_waiting) {
8837                 ts.tv_sec = nspace_handler_timeout;
8838                 ts.tv_nsec = 0;
8839                 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
8840
8841                 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
8842                         error = 0;
8843                 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
8844                         error = nspace_items[i].token;
8845                 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
8846                         if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
8847                                 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
8848                                 continue;
8849                         } else {
8850                                 error = ETIMEDOUT;
8851                         }
8852                 } else if (error == 0) {
8853                         // hmmm, why did we get woken up?
8854                         printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
8855                                nspace_items[i].token);
8856                 }
8857
8858                 if (--nspace_items[i].refcount == 0) {
8859                         nspace_items[i].vp = NULL;     // clear this so that no one will match on it again
8860                         nspace_items[i].arg = NULL;
8861                         nspace_items[i].token = 0;     // clear this so that the handler will not find it anymore
8862                         nspace_items[i].flags = 0;     // this clears it for re-use
8863                 }
8864                 wakeup(&nspace_token_id);
8865                 keep_waiting = 0;
8866         }
8867
8868         lck_mtx_unlock(&nspace_handler_lock);
8869
8870         return error;
8871 }
8872
8873
8874 int
8875 get_nspace_item_status(struct vnode *vp, int32_t *status)
8876 {
8877         int i;
8878
8879         lck_mtx_lock(&nspace_handler_lock);
8880         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8881                 if (nspace_items[i].vp == vp) {
8882                         break;
8883                 }
8884         }
8885
8886         if (i >= MAX_NSPACE_ITEMS) {
8887                 lck_mtx_unlock(&nspace_handler_lock);
8888                 return ENOENT;
8889         }
8890
8891         *status = nspace_items[i].flags;
8892         lck_mtx_unlock(&nspace_handler_lock);
8893         return 0;
8894 }
8895
8896
8897 #if 0
8898 static int
8899 build_volfs_path(struct vnode *vp, char *path, int *len)
8900 {
8901         struct vnode_attr va;
8902         int ret;
8903
8904         VATTR_INIT(&va);
8905         VATTR_WANTED(&va, va_fsid);
8906         VATTR_WANTED(&va, va_fileid);
8907
8908         if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
8909                 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
8910                 ret = -1;
8911         } else {
8912                 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
8913                 ret = 0;
8914         }
8915
8916         return ret;
8917 }
8918 #endif
8919
8920 //
8921 // Note: this function does NOT check permissions on all of the
8922 // parent directories leading to this vnode.  It should only be
8923 // called on behalf of a root process.  Otherwise a process may
8924 // get access to a file because the file itself is readable even
8925 // though its parent directories would prevent access.
8926 //
8927 static int
8928 vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
8929 {
8930         int error, action;
8931
8932         if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8933                 return error;
8934         }
8935
8936 #if CONFIG_MACF
8937         error = mac_vnode_check_open(ctx, vp, fmode);
8938         if (error)
8939                 return error;
8940 #endif
8941
8942         /* compute action to be authorized */
8943         action = 0;
8944         if (fmode & FREAD) {
8945                 action |= KAUTH_VNODE_READ_DATA;
8946         }
8947         if (fmode & (FWRITE | O_TRUNC)) {
8948                 /*
8949                  * If we are writing, appending, and not truncating,
8950                  * indicate that we are appending so that if the
8951                  * UF_APPEND or SF_APPEND bits are set, we do not deny
8952                  * the open.
8953                  */
8954                 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
8955                         action |= KAUTH_VNODE_APPEND_DATA;
8956                 } else {
8957                         action |= KAUTH_VNODE_WRITE_DATA;
8958                 }
8959         }
8960
8961         if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
8962                 return error;
8963
8964
8965         //
8966         // if the vnode is tagged VOPENEVT and the current process
8967         // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
8968         // flag to the open mode so that this open won't count against
8969         // the vnode when carbon delete() does a vnode_isinuse() to see
8970         // if a file is currently in use.  this allows spotlight
8971         // importers to not interfere with carbon apps that depend on
8972         // the no-delete-if-busy semantics of carbon delete().
8973         //
8974         if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
8975                 fmode |= O_EVTONLY;
8976         }
8977
8978         if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
8979                 return error;
8980         }
8981         if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
8982                 VNOP_CLOSE(vp, fmode, ctx);
8983                 return error;
8984         }
8985
8986         /* Call out to allow 3rd party notification of open.
8987          * Ignore result of kauth_authorize_fileop call.
8988          */
8989 #if CONFIG_MACF
8990         mac_vnode_notify_open(ctx, vp, fmode);
8991 #endif
8992         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
8993                                (uintptr_t)vp, 0);
8994
8995
8996         return 0;
8997 }
8998
8999 static int
9000 wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
9001 {
9002         int i, error=0, unblock=0;
9003         task_t curtask;
9004
9005         lck_mtx_lock(&nspace_handler_exclusion_lock);
9006         if (nspace_handlers[nspace_type].handler_busy) {
9007                 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9008                 return EBUSY;
9009         }
9010         nspace_handlers[nspace_type].handler_busy = 1;
9011         lck_mtx_unlock(&nspace_handler_exclusion_lock);
9012
9013         /*
9014          * Any process that gets here will be one of the namespace handlers.
9015          * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9016          * as we can cause deadlocks to occur, because the namespace handler may prevent
9017          * VNOP_INACTIVE from proceeding.  Mark the current task as a P_DEPENDENCY_CAPABLE
9018          * process.
9019          */
9020         curtask = current_task();
9021         bsd_set_dependency_capable (curtask);
9022
9023         lck_mtx_lock(&nspace_handler_lock);
9024         if (nspace_handlers[nspace_type].handler_proc == NULL) {
9025                 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
9026                 nspace_handlers[nspace_type].handler_proc = current_proc();
9027         }
9028
9029         while (error == 0) {
9030
9031                 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9032                         if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9033                                 if (!nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9034                                         continue;
9035                                 }
9036                                 break;
9037                         }
9038                 }
9039
9040                 if (i < MAX_NSPACE_ITEMS) {
9041                         nspace_items[i].flags  &= ~NSPACE_ITEM_NEW;
9042                         nspace_items[i].flags  |= NSPACE_ITEM_PROCESSING;
9043                         nspace_items[i].token  = ++nspace_token_id;
9044
9045                         if (nspace_items[i].vp) {
9046                                 struct fileproc *fp;
9047                                 int32_t indx, fmode;
9048                                 struct proc *p = current_proc();
9049                                 vfs_context_t ctx = vfs_context_current();
9050                                 struct vnode_attr va;
9051
9052
9053                                 /*
9054                                  * Use vnode pointer to acquire a file descriptor for
9055                                  * hand-off to userland
9056                                  */
9057                                 fmode = nspace_open_flags_for_type(nspace_type);
9058                                 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
9059                                 if (error) {
9060                                         unblock = 1;
9061                                         break;
9062                                 }
9063                                 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
9064                                 if (error) {
9065                                         unblock = 1;
9066                                         vnode_put(nspace_items[i].vp);
9067                                         break;
9068                                 }
9069
9070                                 if ((error = falloc(p, &fp, &indx, ctx))) {
9071                                         vn_close(nspace_items[i].vp, fmode, ctx);
9072                                         vnode_put(nspace_items[i].vp);
9073                                         unblock = 1;
9074                                         break;
9075                                 }
9076
9077                                 fp->f_fglob->fg_flag = fmode;
9078                                 fp->f_fglob->fg_ops = &vnops;
9079                                 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
9080
9081                                 proc_fdlock(p);
9082                                 procfdtbl_releasefd(p, indx, NULL);
9083                                 fp_drop(p, indx, fp, 1);
9084                                 proc_fdunlock(p);
9085
9086                                 /*
9087                                  * All variants of the namespace handler struct support these three fields:
9088                                  * token, flags, and the FD pointer
9089                                  */
9090                                 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
9091                                 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
9092                                 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
9093
9094                                 /*
9095                                  * Handle optional fields:
9096                                  * extended version support an info ptr (offset, length), and the
9097                                  *
9098                                  * namedata version supports a unique per-link object ID
9099                                  *
9100                                  */
9101                                 if (nhd->infoptr) {
9102                                         uio_t uio = (uio_t)nspace_items[i].arg;
9103                                         uint64_t u_offset, u_length;
9104
9105                                         if (uio) {
9106                                                 u_offset = uio_offset(uio);
9107                                                 u_length = uio_resid(uio);
9108                                         } else {
9109                                                 u_offset = 0;
9110                                                 u_length = 0;
9111                                         }
9112                                         error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
9113                                         error = copyout(&u_length, nhd->infoptr+sizeof(uint64_t), sizeof(uint64_t));
9114                                 }
9115
9116                                 if (nhd->objid) {
9117                                         VATTR_INIT(&va);
9118                                         VATTR_WANTED(&va, va_linkid);
9119                                         error = vnode_getattr(nspace_items[i].vp, &va, ctx);
9120                                         if (error == 0 ) {
9121                                                 uint64_t linkid = 0;
9122                                                 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
9123                                                         linkid = (uint64_t)va.va_linkid;
9124                                                 }
9125                                                 error = copyout (&linkid, nhd->objid, sizeof(uint64_t));
9126                                         }
9127                                 }
9128
9129                                 if (error) {
9130                                         vn_close(nspace_items[i].vp, fmode, ctx);
9131                                         fp_free(p, indx, fp);
9132                                         unblock = 1;
9133                                 }
9134
9135                                 vnode_put(nspace_items[i].vp);
9136
9137                                 break;
9138                         } else {
9139                                 printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
9140                                        i, nspace_items[i].vp, error, nspace_items[i].vp->v_name);
9141                         }
9142
9143                 } else {
9144                         error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
9145                         if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9146                                 error = EINVAL;
9147                                 break;
9148                         }
9149
9150                 }
9151         }
9152
9153         if (unblock) {
9154                 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9155                         vnode_lock_spin(nspace_items[i].vp);
9156                         nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9157                         vnode_unlock(nspace_items[i].vp);
9158                 }
9159                 nspace_items[i].vp = NULL;
9160                 nspace_items[i].vid = 0;
9161                 nspace_items[i].flags = NSPACE_ITEM_DONE;
9162                 nspace_items[i].token = 0;
9163
9164                 wakeup((caddr_t)&(nspace_items[i].vp));
9165         }
9166
9167         if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
9168                 // just go through every snapshot event and unblock it immediately.
9169                 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9170                         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9171                                 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9172                                         if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9173                                                 nspace_items[i].vp = NULL;
9174                                                 nspace_items[i].vid = 0;
9175                                                 nspace_items[i].flags = NSPACE_ITEM_DONE;
9176                                                 nspace_items[i].token = 0;
9177
9178                                                 wakeup((caddr_t)&(nspace_items[i].vp));
9179                                         }
9180                                 }
9181                         }
9182                 }
9183         }
9184
9185         lck_mtx_unlock(&nspace_handler_lock);
9186
9187         lck_mtx_lock(&nspace_handler_exclusion_lock);
9188         nspace_handlers[nspace_type].handler_busy = 0;
9189         lck_mtx_unlock(&nspace_handler_exclusion_lock);
9190
9191         return error;
9192 }
9193
9194 static inline int validate_namespace_args (int is64bit, int size) {
9195
9196         if (is64bit) {
9197                 /* Must be one of these */
9198                 if (size == sizeof(user64_namespace_handler_info)) {
9199                         goto sizeok;
9200                 }
9201                 if (size == sizeof(user64_namespace_handler_info_ext)) {
9202                         goto sizeok;
9203                 }
9204                 if (size == sizeof(user64_namespace_handler_data)) {
9205                         goto sizeok;
9206                 }
9207                 return EINVAL;
9208         }
9209         else {
9210                 /* 32 bit -- must be one of these */
9211                 if (size == sizeof(user32_namespace_handler_info)) {
9212                         goto sizeok;
9213                 }
9214                 if (size == sizeof(user32_namespace_handler_info_ext)) {
9215                         goto sizeok;
9216                 }
9217                 if (size == sizeof(user32_namespace_handler_data)) {
9218                         goto sizeok;
9219                 }
9220                 return EINVAL;
9221         }
9222
9223 sizeok:
9224
9225         return 0;
9226
9227 }
9228
9229 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
9230 {
9231         int error = 0;
9232         namespace_handler_data nhd;
9233
9234         bzero (&nhd, sizeof(namespace_handler_data));
9235
9236         if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
9237                         (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9238                 return EINVAL;
9239         }
9240
9241         if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9242                 return error;
9243         }
9244
9245         error = validate_namespace_args (is64bit, size);
9246         if (error) {
9247                 return error;
9248         }
9249
9250         /* Copy in the userland pointers into our kernel-only struct */
9251
9252         if (is64bit) {
9253                 /* 64 bit userland structures */
9254                 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
9255                 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
9256                 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
9257
9258                 /* If the size is greater than the standard info struct, add in extra fields */
9259                 if (size > (sizeof(user64_namespace_handler_info))) {
9260                         if (size >= (sizeof(user64_namespace_handler_info_ext))) {
9261                                 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
9262                         }
9263                         if (size == (sizeof(user64_namespace_handler_data))) {
9264                                 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
9265                         }
9266                         /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9267                 }
9268         }
9269         else {
9270                 /* 32 bit userland structures */
9271                 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
9272                 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
9273                 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
9274
9275                 if (size > (sizeof(user32_namespace_handler_info))) {
9276                         if (size >= (sizeof(user32_namespace_handler_info_ext))) {
9277                                 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
9278                         }
9279                         if (size == (sizeof(user32_namespace_handler_data))) {
9280                                 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
9281                         }
9282                         /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9283                 }
9284         }
9285
9286         return wait_for_namespace_event(&nhd, nspace_type);
9287 }
9288
9289 /*
9290  * Make a filesystem-specific control call:
9291  */
9292 /* ARGSUSED */
9293 static int
9294 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
9295 {
9296         int error=0;
9297         boolean_t is64bit;
9298         u_int size;
9299 #define STK_PARAMS 128
9300         char stkbuf[STK_PARAMS];
9301         caddr_t data, memp;
9302         vnode_t vp = *arg_vp;
9303
9304         size = IOCPARM_LEN(cmd);
9305         if (size > IOCPARM_MAX) return (EINVAL);
9306
9307         is64bit = proc_is64bit(p);
9308
9309         memp = NULL;
9310
9311         /*
9312          * ensure the buffer is large enough for underlying calls
9313          */
9314 #ifndef HFSIOC_GETPATH
9315 typedef char pn_t[MAXPATHLEN];
9316 #define HFSIOC_GETPATH  _IOWR('h', 13, pn_t)
9317 #endif
9318
9319 #ifndef HFS_GETPATH
9320 #define HFS_GETPATH  IOCBASECMD(HFSIOC_GETPATH)
9321 #endif
9322         if (IOCBASECMD(cmd) == HFS_GETPATH) {
9323                 /* Round up to MAXPATHLEN regardless of user input */
9324                 size = MAXPATHLEN;
9325         }
9326
9327
9328         if (size > sizeof (stkbuf)) {
9329                 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
9330                 data = memp;
9331         } else {
9332                 data = &stkbuf[0];
9333         };
9334
9335         if (cmd & IOC_IN) {
9336                 if (size) {
9337                         error = copyin(udata, data, size);
9338                         if (error) {
9339                                 if (memp) {
9340                                         kfree (memp, size);
9341                                 }
9342                                 return error;
9343                         }
9344                 } else {
9345                         if (is64bit) {
9346                                 *(user_addr_t *)data = udata;
9347                         }
9348                         else {
9349                                 *(uint32_t *)data = (uint32_t)udata;
9350                         }
9351                 };
9352         } else if ((cmd & IOC_OUT) && size) {
9353                 /*
9354                  * Zero the buffer so the user always
9355                  * gets back something deterministic.
9356                  */
9357                 bzero(data, size);
9358         } else if (cmd & IOC_VOID) {
9359                 if (is64bit) {
9360                         *(user_addr_t *)data = udata;
9361                 }
9362                 else {
9363                         *(uint32_t *)data = (uint32_t)udata;
9364                 }
9365         }
9366
9367         /* Check to see if it's a generic command */
9368         switch (IOCBASECMD(cmd)) {
9369
9370                 case FSCTL_SYNC_VOLUME: {
9371                         mount_t mp = vp->v_mount;
9372                         int arg = *(uint32_t*)data;
9373
9374                         /* record vid of vp so we can drop it below. */
9375                         uint32_t vvid = vp->v_id;
9376
9377                         /*
9378                          * Then grab mount_iterref so that we can release the vnode.
9379                          * Without this, a thread may call vnode_iterate_prepare then
9380                          * get into a deadlock because we've never released the root vp
9381                          */
9382                         error = mount_iterref (mp, 0);
9383                         if (error)  {
9384                                 break;
9385                         }
9386                         vnode_put(vp);
9387
9388                         /* issue the sync for this volume */
9389                         (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
9390
9391                         /*
9392                          * Then release the mount_iterref once we're done syncing; it's not
9393                          * needed for the VNOP_IOCTL below
9394                          */
9395                         mount_iterdrop(mp);
9396
9397                         if (arg & FSCTL_SYNC_FULLSYNC) {
9398                                 /* re-obtain vnode iocount on the root vp, if possible */
9399                                 error = vnode_getwithvid (vp, vvid);
9400                                 if (error == 0) {
9401                                         error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
9402                                         vnode_put (vp);
9403                                 }
9404                         }
9405                         /* mark the argument VP as having been released */
9406                         *arg_vp = NULL;
9407                 }
9408                 break;
9409
9410                 case FSCTL_SET_PACKAGE_EXTS: {
9411                         user_addr_t ext_strings;
9412                         uint32_t    num_entries;
9413                         uint32_t    max_width;
9414
9415                         if (   (is64bit && size != sizeof(user64_package_ext_info))
9416                                         || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
9417
9418                                 // either you're 64-bit and passed a 64-bit struct or
9419                                 // you're 32-bit and passed a 32-bit struct.  otherwise
9420                                 // it's not ok.
9421                                 error = EINVAL;
9422                                 break;
9423                         }
9424
9425                         if (is64bit) {
9426                                 ext_strings = ((user64_package_ext_info *)data)->strings;
9427                                 num_entries = ((user64_package_ext_info *)data)->num_entries;
9428                                 max_width   = ((user64_package_ext_info *)data)->max_width;
9429                         } else {
9430                                 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
9431                                 num_entries = ((user32_package_ext_info *)data)->num_entries;
9432                                 max_width   = ((user32_package_ext_info *)data)->max_width;
9433                         }
9434                         error = set_package_extensions_table(ext_strings, num_entries, max_width);
9435                 }
9436                 break;
9437
9438                 /* namespace handlers */
9439                 case FSCTL_NAMESPACE_HANDLER_GET: {
9440                         error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
9441                 }
9442                 break;
9443
9444                 /* Snapshot handlers */
9445                 case FSCTL_OLD_SNAPSHOT_HANDLER_GET: {
9446                         error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
9447                 }
9448                 break;
9449
9450                 case FSCTL_SNAPSHOT_HANDLER_GET_EXT: {
9451                         error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
9452                 }
9453                 break;
9454
9455                 case FSCTL_NAMESPACE_HANDLER_UPDATE: {
9456                         uint32_t token, val;
9457                         int i;
9458
9459                         if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
9460                                 break;
9461                         }
9462
9463                         if (!nspace_is_special_process(p)) {
9464                                 error = EINVAL;
9465                                 break;
9466                         }
9467
9468                         token = ((uint32_t *)data)[0];
9469                         val   = ((uint32_t *)data)[1];
9470
9471                         lck_mtx_lock(&nspace_handler_lock);
9472
9473                         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9474                                 if (nspace_items[i].token == token) {
9475                                         break;  /* exit for loop, not case stmt */
9476                                 }
9477                         }
9478
9479                         if (i >= MAX_NSPACE_ITEMS) {
9480                                 error = ENOENT;
9481                         } else {
9482                                 //
9483                                 // if this bit is set, when resolve_nspace_item() times out
9484                                 // it will loop and go back to sleep.
9485                                 //
9486                                 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
9487                         }
9488
9489                         lck_mtx_unlock(&nspace_handler_lock);
9490
9491                         if (error) {
9492                                 printf("nspace-handler-update: did not find token %u\n", token);
9493                         }
9494                 }
9495                 break;
9496
9497                 case FSCTL_NAMESPACE_HANDLER_UNBLOCK: {
9498                         uint32_t token, val;
9499                         int i;
9500
9501                         if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
9502                                 break;
9503                         }
9504
9505                         if (!nspace_is_special_process(p)) {
9506                                 error = EINVAL;
9507                                 break;
9508                         }
9509
9510                         token = ((uint32_t *)data)[0];
9511                         val   = ((uint32_t *)data)[1];
9512
9513                         lck_mtx_lock(&nspace_handler_lock);
9514
9515                         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9516                                 if (nspace_items[i].token == token) {
9517                                         break; /* exit for loop, not case statement */
9518                                 }
9519                         }
9520
9521                         if (i >= MAX_NSPACE_ITEMS) {
9522                                 printf("nspace-handler-unblock: did not find token %u\n", token);
9523                                 error = ENOENT;
9524                         } else {
9525                                 if (val == 0 && nspace_items[i].vp) {
9526                                         vnode_lock_spin(nspace_items[i].vp);
9527                                         nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9528                                         vnode_unlock(nspace_items[i].vp);
9529                                 }
9530
9531                                 nspace_items[i].vp = NULL;
9532                                 nspace_items[i].arg = NULL;
9533                                 nspace_items[i].op = 0;
9534                                 nspace_items[i].vid = 0;
9535                                 nspace_items[i].flags = NSPACE_ITEM_DONE;
9536                                 nspace_items[i].token = 0;
9537
9538                                 wakeup((caddr_t)&(nspace_items[i].vp));
9539                         }
9540
9541                         lck_mtx_unlock(&nspace_handler_lock);
9542                 }
9543                 break;
9544
9545                 case FSCTL_NAMESPACE_HANDLER_CANCEL: {
9546                         uint32_t token, val;
9547                         int i;
9548
9549                         if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
9550                                 break;
9551                         }
9552
9553                         if (!nspace_is_special_process(p)) {
9554                                 error = EINVAL;
9555                                 break;
9556                         }
9557
9558                         token = ((uint32_t *)data)[0];
9559                         val   = ((uint32_t *)data)[1];
9560
9561                         lck_mtx_lock(&nspace_handler_lock);
9562
9563                         for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9564                                 if (nspace_items[i].token == token) {
9565                                         break;  /* exit for loop, not case stmt */
9566                                 }
9567                         }
9568
9569                         if (i >= MAX_NSPACE_ITEMS) {
9570                                 printf("nspace-handler-cancel: did not find token %u\n", token);
9571                                 error = ENOENT;
9572                         } else {
9573                                 if (nspace_items[i].vp) {
9574                                         vnode_lock_spin(nspace_items[i].vp);
9575                                         nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9576                                         vnode_unlock(nspace_items[i].vp);
9577                                 }
9578
9579                                 nspace_items[i].vp = NULL;
9580                                 nspace_items[i].arg = NULL;
9581                                 nspace_items[i].vid = 0;
9582                                 nspace_items[i].token = val;
9583                                 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
9584                                 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
9585
9586                                 wakeup((caddr_t)&(nspace_items[i].vp));
9587                         }
9588
9589                         lck_mtx_unlock(&nspace_handler_lock);
9590                 }
9591                 break;
9592
9593                 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
9594                         if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9595                                 break;
9596                         }
9597
9598                         // we explicitly do not do the namespace_handler_proc check here
9599
9600                         lck_mtx_lock(&nspace_handler_lock);
9601                         snapshot_timestamp = ((uint32_t *)data)[0];
9602                         wakeup(&nspace_item_idx);
9603                         lck_mtx_unlock(&nspace_handler_lock);
9604                         printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
9605
9606                 }
9607                 break;
9608
9609                 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
9610                 {
9611                         if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9612                                 break;
9613                         }
9614
9615                         lck_mtx_lock(&nspace_handler_lock);
9616                         nspace_allow_virtual_devs = ((uint32_t *)data)[0];
9617                         lck_mtx_unlock(&nspace_handler_lock);
9618                         printf("nspace-snapshot-handler will%s allow events on disk-images\n",
9619                                         nspace_allow_virtual_devs ? "" : " NOT");
9620                         error = 0;
9621
9622                 }
9623                 break;
9624
9625                 case FSCTL_SET_FSTYPENAME_OVERRIDE:
9626                 {
9627                         if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9628                                 break;
9629                         }
9630                         if (vp->v_mount) {
9631                                 mount_lock(vp->v_mount);
9632                                 if (data[0] != 0) {
9633                                         strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
9634                                         vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
9635                                         if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
9636                                                 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
9637                                                 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
9638                                         }
9639                                 } else {
9640                                         if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
9641                                                 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
9642                                         }
9643                                         vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
9644                                         vp->v_mount->fstypename_override[0] = '\0';
9645                                 }
9646                                 mount_unlock(vp->v_mount);
9647                         }
9648                 }
9649                 break;
9650
9651                 default: {
9652                         /* Invoke the filesystem-specific code */
9653                         error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
9654                 }
9655
9656         } /* end switch stmt */
9657
9658         /*
9659          * if no errors, copy any data to user. Size was
9660          * already set and checked above.
9661          */
9662         if (error == 0 && (cmd & IOC_OUT) && size)
9663                 error = copyout(data, udata, size);
9664
9665         if (memp) {
9666                 kfree(memp, size);
9667         }
9668
9669         return error;
9670 }
9671
9672 /* ARGSUSED */
9673 int
9674 fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
9675 {
9676         int error;
9677         struct nameidata nd;
9678         u_long nameiflags;
9679         vnode_t vp = NULL;
9680         vfs_context_t ctx = vfs_context_current();
9681
9682         AUDIT_ARG(cmd, uap->cmd);
9683         AUDIT_ARG(value32, uap->options);
9684         /* Get the vnode for the file we are getting info on:  */
9685         nameiflags = 0;
9686         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
9687         NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
9688                UIO_USERSPACE, uap->path, ctx);
9689         if ((error = namei(&nd))) goto done;
9690         vp = nd.ni_vp;
9691         nameidone(&nd);
9692
9693 #if CONFIG_MACF
9694         error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
9695         if (error) {
9696                 goto done;
9697         }
9698 #endif
9699
9700         error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
9701
9702 done:
9703         if (vp)
9704                 vnode_put(vp);
9705         return error;
9706 }
9707 /* ARGSUSED */
9708 int
9709 ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
9710 {
9711         int error;
9712         vnode_t vp = NULL;
9713         vfs_context_t ctx = vfs_context_current();
9714         int fd = -1;
9715
9716         AUDIT_ARG(fd, uap->fd);
9717         AUDIT_ARG(cmd, uap->cmd);
9718         AUDIT_ARG(value32, uap->options);
9719
9720         /* Get the vnode for the file we are getting info on:  */
9721         if ((error = file_vnode(uap->fd, &vp)))
9722                 goto done;
9723         fd = uap->fd;
9724         if ((error = vnode_getwithref(vp))) {
9725                 goto done;
9726         }
9727
9728 #if CONFIG_MACF
9729         error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
9730         if (error) {
9731                 goto done;
9732         }
9733 #endif
9734
9735         error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
9736
9737 done:
9738         if (fd != -1)
9739                 file_drop(fd);
9740
9741         if (vp)
9742                 vnode_put(vp);
9743         return error;
9744 }
9745 /* end of fsctl system call */
9746
9747 /*
9748  *  Retrieve the data of an extended attribute.
9749  */
9750 int
9751 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
9752 {
9753         vnode_t vp;
9754         struct nameidata nd;
9755         char attrname[XATTR_MAXNAMELEN+1];
9756         vfs_context_t ctx = vfs_context_current();
9757         uio_t auio = NULL;
9758         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9759         size_t attrsize = 0;
9760         size_t namelen;
9761         u_int32_t nameiflags;
9762         int error;
9763         char uio_buf[ UIO_SIZEOF(1) ];
9764
9765         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9766                 return (EINVAL);
9767
9768         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9769         NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
9770         if ((error = namei(&nd))) {
9771                 return (error);
9772         }
9773         vp = nd.ni_vp;
9774         nameidone(&nd);
9775
9776         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9777                 goto out;
9778         }
9779         if (xattr_protected(attrname)) {
9780                 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
9781                         error = EPERM;
9782                         goto out;
9783                 }
9784         }
9785         /*
9786          * the specific check for 0xffffffff is a hack to preserve
9787          * binaray compatibilty in K64 with applications that discovered
9788          * that passing in a buf pointer and a size of -1 resulted in
9789          * just the size of the indicated extended attribute being returned.
9790          * this isn't part of the documented behavior, but because of the
9791          * original implemtation's check for "uap->size > 0", this behavior
9792          * was allowed. In K32 that check turned into a signed comparison
9793          * even though uap->size is unsigned...  in K64, we blow by that
9794          * check because uap->size is unsigned and doesn't get sign smeared
9795          * in the munger for a 32 bit user app.  we also need to add a
9796          * check to limit the maximum size of the buffer being passed in...
9797          * unfortunately, the underlying fileystems seem to just malloc
9798          * the requested size even if the actual extended attribute is tiny.
9799          * because that malloc is for kernel wired memory, we have to put a
9800          * sane limit on it.
9801          *
9802          * U32 running on K64 will yield 0x00000000ffffffff for uap->size
9803          * U64 running on K64 will yield -1 (64 bits wide)
9804          * U32/U64 running on K32 will yield -1 (32 bits wide)
9805          */
9806         if (uap->size == 0xffffffff || uap->size == (size_t)-1)
9807                 goto no_uio;
9808
9809         if (uap->value) {
9810                 if (uap->size > (size_t)XATTR_MAXSIZE)
9811                         uap->size = XATTR_MAXSIZE;
9812
9813                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9814                                             &uio_buf[0], sizeof(uio_buf));
9815                 uio_addiov(auio, uap->value, uap->size);
9816         }
9817 no_uio:
9818         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
9819 out:
9820         vnode_put(vp);
9821
9822         if (auio) {
9823                 *retval = uap->size - uio_resid(auio);
9824         } else {
9825                 *retval = (user_ssize_t)attrsize;
9826         }
9827
9828         return (error);
9829 }
9830
9831 /*
9832  * Retrieve the data of an extended attribute.
9833  */
9834 int
9835 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
9836 {
9837         vnode_t vp;
9838         char attrname[XATTR_MAXNAMELEN+1];
9839         uio_t auio = NULL;
9840         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9841         size_t attrsize = 0;
9842         size_t namelen;
9843         int error;
9844         char uio_buf[ UIO_SIZEOF(1) ];
9845
9846         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9847                 return (EINVAL);
9848
9849         if ( (error = file_vnode(uap->fd, &vp)) ) {
9850                 return (error);
9851         }
9852         if ( (error = vnode_getwithref(vp)) ) {
9853                 file_drop(uap->fd);
9854                 return(error);
9855         }
9856         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9857                 goto out;
9858         }
9859         if (xattr_protected(attrname)) {
9860                 error = EPERM;
9861                 goto out;
9862         }
9863         if (uap->value && uap->size > 0) {
9864                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9865                                             &uio_buf[0], sizeof(uio_buf));
9866                 uio_addiov(auio, uap->value, uap->size);
9867         }
9868
9869         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
9870 out:
9871         (void)vnode_put(vp);
9872         file_drop(uap->fd);
9873
9874         if (auio) {
9875                 *retval = uap->size - uio_resid(auio);
9876         } else {
9877                 *retval = (user_ssize_t)attrsize;
9878         }
9879         return (error);
9880 }
9881
9882 /*
9883  * Set the data of an extended attribute.
9884  */
9885 int
9886 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
9887 {
9888         vnode_t vp;
9889         struct nameidata nd;
9890         char attrname[XATTR_MAXNAMELEN+1];
9891         vfs_context_t ctx = vfs_context_current();
9892         uio_t auio = NULL;
9893         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9894         size_t namelen;
9895         u_int32_t nameiflags;
9896         int error;
9897         char uio_buf[ UIO_SIZEOF(1) ];
9898
9899         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9900                 return (EINVAL);
9901
9902         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9903                 if (error == EPERM) {
9904                         /* if the string won't fit in attrname, copyinstr emits EPERM */
9905                         return (ENAMETOOLONG);
9906                 }
9907                 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9908                 return error;
9909         }
9910         if (xattr_protected(attrname))
9911                 return(EPERM);
9912         if (uap->size != 0 && uap->value == 0) {
9913                 return (EINVAL);
9914         }
9915
9916         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9917         NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
9918         if ((error = namei(&nd))) {
9919                 return (error);
9920         }
9921         vp = nd.ni_vp;
9922         nameidone(&nd);
9923
9924         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9925                                     &uio_buf[0], sizeof(uio_buf));
9926         uio_addiov(auio, uap->value, uap->size);
9927
9928         error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
9929 #if CONFIG_FSE
9930         if (error == 0) {
9931                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9932                     FSE_ARG_VNODE, vp,
9933                     FSE_ARG_DONE);
9934         }
9935 #endif
9936         vnode_put(vp);
9937         *retval = 0;
9938         return (error);
9939 }
9940
9941 /*
9942  * Set the data of an extended attribute.
9943  */
9944 int
9945 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
9946 {
9947         vnode_t vp;
9948         char attrname[XATTR_MAXNAMELEN+1];
9949         uio_t auio = NULL;
9950         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9951         size_t namelen;
9952         int error;
9953         char uio_buf[ UIO_SIZEOF(1) ];
9954 #if CONFIG_FSE
9955         vfs_context_t ctx = vfs_context_current();
9956 #endif
9957
9958         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9959                 return (EINVAL);
9960
9961         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9962                 return (error);
9963         }
9964         if (xattr_protected(attrname))
9965                 return(EPERM);
9966         if (uap->size != 0 && uap->value == 0) {
9967                 return (EINVAL);
9968         }
9969         if ( (error = file_vnode(uap->fd, &vp)) ) {
9970                 return (error);
9971         }
9972         if ( (error = vnode_getwithref(vp)) ) {
9973                 file_drop(uap->fd);
9974                 return(error);
9975         }
9976         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9977                                     &uio_buf[0], sizeof(uio_buf));
9978         uio_addiov(auio, uap->value, uap->size);
9979
9980         error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
9981 #if CONFIG_FSE
9982         if (error == 0) {
9983                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9984                     FSE_ARG_VNODE, vp,
9985                     FSE_ARG_DONE);
9986         }
9987 #endif
9988         vnode_put(vp);
9989         file_drop(uap->fd);
9990         *retval = 0;
9991         return (error);
9992 }
9993
9994 /*
9995  * Remove an extended attribute.
9996  * XXX Code duplication here.
9997  */
9998 int
9999 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
10000 {
10001         vnode_t vp;
10002         struct nameidata nd;
10003         char attrname[XATTR_MAXNAMELEN+1];
10004         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10005         vfs_context_t ctx = vfs_context_current();
10006         size_t namelen;
10007         u_int32_t nameiflags;
10008         int error;
10009
10010         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10011                 return (EINVAL);
10012
10013         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10014         if (error != 0) {
10015                 return (error);
10016         }
10017         if (xattr_protected(attrname))
10018                 return(EPERM);
10019         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10020         NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
10021         if ((error = namei(&nd))) {
10022                 return (error);
10023         }
10024         vp = nd.ni_vp;
10025         nameidone(&nd);
10026
10027         error = vn_removexattr(vp, attrname, uap->options, ctx);
10028 #if CONFIG_FSE
10029         if (error == 0) {
10030                 add_fsevent(FSE_XATTR_REMOVED, ctx,
10031                     FSE_ARG_VNODE, vp,
10032                     FSE_ARG_DONE);
10033         }
10034 #endif
10035         vnode_put(vp);
10036         *retval = 0;
10037         return (error);
10038 }
10039
10040 /*
10041  * Remove an extended attribute.
10042  * XXX Code duplication here.
10043  */
10044 int
10045 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
10046 {
10047         vnode_t vp;
10048         char attrname[XATTR_MAXNAMELEN+1];
10049         size_t namelen;
10050         int error;
10051 #if CONFIG_FSE
10052         vfs_context_t ctx = vfs_context_current();
10053 #endif
10054
10055         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10056                 return (EINVAL);
10057
10058         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10059         if (error != 0) {
10060                 return (error);
10061         }
10062         if (xattr_protected(attrname))
10063                 return(EPERM);
10064         if ( (error = file_vnode(uap->fd, &vp)) ) {
10065                 return (error);
10066         }
10067         if ( (error = vnode_getwithref(vp)) ) {
10068                 file_drop(uap->fd);
10069                 return(error);
10070         }
10071
10072         error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
10073 #if CONFIG_FSE
10074         if (error == 0) {
10075                 add_fsevent(FSE_XATTR_REMOVED, ctx,
10076                     FSE_ARG_VNODE, vp,
10077                     FSE_ARG_DONE);
10078         }
10079 #endif
10080         vnode_put(vp);
10081         file_drop(uap->fd);
10082         *retval = 0;
10083         return (error);
10084 }
10085
10086 /*
10087  * Retrieve the list of extended attribute names.
10088  * XXX Code duplication here.
10089  */
10090 int
10091 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
10092 {
10093         vnode_t vp;
10094         struct nameidata nd;
10095         vfs_context_t ctx = vfs_context_current();
10096         uio_t auio = NULL;
10097         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10098         size_t attrsize = 0;
10099         u_int32_t nameiflags;
10100         int error;
10101         char uio_buf[ UIO_SIZEOF(1) ];
10102
10103         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10104                 return (EINVAL);
10105
10106         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10107         NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
10108         if ((error = namei(&nd))) {
10109                 return (error);
10110         }
10111         vp = nd.ni_vp;
10112         nameidone(&nd);
10113         if (uap->namebuf != 0 && uap->bufsize > 0) {
10114                 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
10115                                             &uio_buf[0], sizeof(uio_buf));
10116                 uio_addiov(auio, uap->namebuf, uap->bufsize);
10117         }
10118
10119         error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
10120
10121         vnode_put(vp);
10122         if (auio) {
10123                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10124         } else {
10125                 *retval = (user_ssize_t)attrsize;
10126         }
10127         return (error);
10128 }
10129
10130 /*
10131  * Retrieve the list of extended attribute names.
10132  * XXX Code duplication here.
10133  */
10134 int
10135 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
10136 {
10137         vnode_t vp;
10138         uio_t auio = NULL;
10139         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10140         size_t attrsize = 0;
10141         int error;
10142         char uio_buf[ UIO_SIZEOF(1) ];
10143
10144         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10145                 return (EINVAL);
10146
10147         if ( (error = file_vnode(uap->fd, &vp)) ) {
10148                 return (error);
10149         }
10150         if ( (error = vnode_getwithref(vp)) ) {
10151                 file_drop(uap->fd);
10152                 return(error);
10153         }
10154         if (uap->namebuf != 0 && uap->bufsize > 0) {
10155                 auio = uio_createwithbuffer(1, 0, spacetype,
10156                                                                           UIO_READ, &uio_buf[0], sizeof(uio_buf));
10157                 uio_addiov(auio, uap->namebuf, uap->bufsize);
10158         }
10159
10160         error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
10161
10162         vnode_put(vp);
10163         file_drop(uap->fd);
10164         if (auio) {
10165                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10166         } else {
10167                 *retval = (user_ssize_t)attrsize;
10168         }
10169         return (error);
10170 }
10171
10172 static int fsgetpath_internal(
10173         vfs_context_t ctx, int volfs_id, uint64_t objid,
10174         vm_size_t bufsize, caddr_t buf, int *pathlen)
10175 {
10176         int error;
10177         struct mount *mp = NULL;
10178         vnode_t vp;
10179         int length;
10180         int bpflags;
10181
10182         if (bufsize > PAGE_SIZE) {
10183                 return (EINVAL);
10184         }
10185
10186         if (buf == NULL) {
10187                 return (ENOMEM);
10188         }
10189
10190         if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
10191                 error = ENOTSUP;  /* unexpected failure */
10192                 return ENOTSUP;
10193         }
10194
10195 unionget:
10196         if (objid == 2) {
10197                 error = VFS_ROOT(mp, &vp, ctx);
10198         } else {
10199                 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
10200         }
10201
10202         if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
10203                 /*
10204                  * If the fileid isn't found and we're in a union
10205                  * mount volume, then see if the fileid is in the
10206                  * mounted-on volume.
10207                  */
10208                 struct mount *tmp = mp;
10209                 mp = vnode_mount(tmp->mnt_vnodecovered);
10210                 vfs_unbusy(tmp);
10211                 if (vfs_busy(mp, LK_NOWAIT) == 0)
10212                         goto unionget;
10213         } else {
10214                 vfs_unbusy(mp);
10215         }
10216
10217         if (error) {
10218                 return error;
10219         }
10220
10221 #if CONFIG_MACF
10222         error = mac_vnode_check_fsgetpath(ctx, vp);
10223         if (error) {
10224                 vnode_put(vp);
10225                 return error;
10226         }
10227 #endif
10228
10229         /* Obtain the absolute path to this vnode. */
10230         bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
10231         bpflags |= BUILDPATH_CHECK_MOVED;
10232         error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
10233         vnode_put(vp);
10234
10235         if (error) {
10236                 goto out;
10237         }
10238
10239         AUDIT_ARG(text, buf);
10240
10241         if (kdebug_enable) {
10242                 long dbg_parms[NUMPARMS];
10243                 int  dbg_namelen;
10244
10245                 dbg_namelen = (int)sizeof(dbg_parms);
10246
10247         if (length < dbg_namelen) {
10248                         memcpy((char *)dbg_parms, buf, length);
10249                         memset((char *)dbg_parms + length, 0, dbg_namelen - length);
10250
10251                         dbg_namelen = length;
10252                 } else {
10253                         memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
10254                 }
10255
10256                 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
10257         }
10258
10259         *pathlen = (user_ssize_t)length; /* may be superseded by error */
10260
10261 out:
10262         return (error);
10263 }
10264
10265 /*
10266  * Obtain the full pathname of a file system object by id.
10267  *
10268  * This is a private SPI used by the File Manager.
10269  */
10270 __private_extern__
10271 int
10272 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
10273 {
10274         vfs_context_t ctx = vfs_context_current();
10275         fsid_t fsid;
10276         char *realpath;
10277         int length;
10278         int error;
10279
10280         if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
10281                 return (error);
10282         }
10283         AUDIT_ARG(value32, fsid.val[0]);
10284         AUDIT_ARG(value64, uap->objid);
10285         /* Restrict output buffer size for now. */
10286
10287         if (uap->bufsize > PAGE_SIZE) {
10288                 return (EINVAL);
10289         }
10290         MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
10291         if (realpath == NULL) {
10292                 return (ENOMEM);
10293         }
10294
10295         error = fsgetpath_internal(
10296                 ctx, fsid.val[0], uap->objid,
10297                 uap->bufsize, realpath, &length);
10298
10299         if (error) {
10300                 goto out;
10301         }
10302
10303         error = copyout((caddr_t)realpath, uap->buf, length);
10304
10305         *retval = (user_ssize_t)length; /* may be superseded by error */
10306 out:
10307         if (realpath) {
10308                 FREE(realpath, M_TEMP);
10309         }
10310         return (error);
10311 }
10312
10313 /*
10314  * Common routine to handle various flavors of statfs data heading out
10315  *      to user space.
10316  *
10317  * Returns:     0                       Success
10318  *              EFAULT
10319  */
10320 static int
10321 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
10322     user_addr_t bufp, int *sizep, boolean_t is_64_bit,
10323     boolean_t partial_copy)
10324 {
10325         int             error;
10326         int             my_size, copy_size;
10327
10328         if (is_64_bit) {
10329                 struct user64_statfs sfs;
10330                 my_size = copy_size = sizeof(sfs);
10331                 bzero(&sfs, my_size);
10332                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10333                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10334                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
10335                 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
10336                 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
10337                 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
10338                 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
10339                 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
10340                 sfs.f_files = (user64_long_t)sfsp->f_files;
10341                 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
10342                 sfs.f_fsid = sfsp->f_fsid;
10343                 sfs.f_owner = sfsp->f_owner;
10344                 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
10345                         strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
10346                 } else {
10347                         strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10348                 }
10349                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10350                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
10351
10352                 if (partial_copy) {
10353                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10354                 }
10355                 error = copyout((caddr_t)&sfs, bufp, copy_size);
10356         }
10357         else {
10358                 struct user32_statfs sfs;
10359
10360                 my_size = copy_size = sizeof(sfs);
10361                 bzero(&sfs, my_size);
10362
10363                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10364                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10365                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
10366
10367                 /*
10368                  * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
10369                  * have to fudge the numbers here in that case.   We inflate the blocksize in order
10370                  * to reflect the filesystem size as best we can.
10371                  */
10372                 if ((sfsp->f_blocks > INT_MAX)
10373                         /* Hack for 4061702 . I think the real fix is for Carbon to
10374                          * look for some volume capability and not depend on hidden
10375                          * semantics agreed between a FS and carbon.
10376                          * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
10377                          * for Carbon to set bNoVolumeSizes volume attribute.
10378                          * Without this the webdavfs files cannot be copied onto
10379                          * disk as they look huge. This change should not affect
10380                          * XSAN as they should not setting these to -1..
10381                          */
10382                          && (sfsp->f_blocks != 0xffffffffffffffffULL)
10383                          && (sfsp->f_bfree != 0xffffffffffffffffULL)
10384                          && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
10385                         int             shift;
10386
10387                         /*
10388                          * Work out how far we have to shift the block count down to make it fit.
10389                          * Note that it's possible to have to shift so far that the resulting
10390                          * blocksize would be unreportably large.  At that point, we will clip
10391                          * any values that don't fit.
10392                          *
10393                          * For safety's sake, we also ensure that f_iosize is never reported as
10394                          * being smaller than f_bsize.
10395                          */
10396                         for (shift = 0; shift < 32; shift++) {
10397                                 if ((sfsp->f_blocks >> shift) <= INT_MAX)
10398                                         break;
10399                                 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
10400                                         break;
10401                         }
10402 #define __SHIFT_OR_CLIP(x, s)   ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
10403                         sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
10404                         sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
10405                         sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
10406 #undef __SHIFT_OR_CLIP
10407                         sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
10408                         sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
10409                 } else {
10410                         /* filesystem is small enough to be reported honestly */
10411                         sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
10412                         sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
10413                         sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
10414                         sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
10415                         sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
10416                 }
10417                 sfs.f_files = (user32_long_t)sfsp->f_files;
10418                 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
10419                 sfs.f_fsid = sfsp->f_fsid;
10420                 sfs.f_owner = sfsp->f_owner;
10421                 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
10422                         strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
10423                 } else {
10424                         strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10425                 }
10426                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10427                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
10428
10429                 if (partial_copy) {
10430                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10431                 }
10432                 error = copyout((caddr_t)&sfs, bufp, copy_size);
10433         }
10434
10435         if (sizep != NULL) {
10436                 *sizep = my_size;
10437         }
10438         return(error);
10439 }
10440
10441 /*
10442  * copy stat structure into user_stat structure.
10443  */
10444 void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
10445 {
10446         bzero(usbp, sizeof(*usbp));
10447
10448         usbp->st_dev = sbp->st_dev;
10449         usbp->st_ino = sbp->st_ino;
10450         usbp->st_mode = sbp->st_mode;
10451         usbp->st_nlink = sbp->st_nlink;
10452         usbp->st_uid = sbp->st_uid;
10453         usbp->st_gid = sbp->st_gid;
10454         usbp->st_rdev = sbp->st_rdev;
10455 #ifndef _POSIX_C_SOURCE
10456         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10457         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10458         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10459         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10460         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10461         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10462 #else
10463         usbp->st_atime = sbp->st_atime;
10464         usbp->st_atimensec = sbp->st_atimensec;
10465         usbp->st_mtime = sbp->st_mtime;
10466         usbp->st_mtimensec = sbp->st_mtimensec;
10467         usbp->st_ctime = sbp->st_ctime;
10468         usbp->st_ctimensec = sbp->st_ctimensec;
10469 #endif
10470         usbp->st_size = sbp->st_size;
10471         usbp->st_blocks = sbp->st_blocks;
10472         usbp->st_blksize = sbp->st_blksize;
10473         usbp->st_flags = sbp->st_flags;
10474         usbp->st_gen = sbp->st_gen;
10475         usbp->st_lspare = sbp->st_lspare;
10476         usbp->st_qspare[0] = sbp->st_qspare[0];
10477         usbp->st_qspare[1] = sbp->st_qspare[1];
10478 }
10479
10480 void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
10481 {
10482         bzero(usbp, sizeof(*usbp));
10483
10484         usbp->st_dev = sbp->st_dev;
10485         usbp->st_ino = sbp->st_ino;
10486         usbp->st_mode = sbp->st_mode;
10487         usbp->st_nlink = sbp->st_nlink;
10488         usbp->st_uid = sbp->st_uid;
10489         usbp->st_gid = sbp->st_gid;
10490         usbp->st_rdev = sbp->st_rdev;
10491 #ifndef _POSIX_C_SOURCE
10492         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10493         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10494         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10495         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10496         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10497         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10498 #else
10499         usbp->st_atime = sbp->st_atime;
10500         usbp->st_atimensec = sbp->st_atimensec;
10501         usbp->st_mtime = sbp->st_mtime;
10502         usbp->st_mtimensec = sbp->st_mtimensec;
10503         usbp->st_ctime = sbp->st_ctime;
10504         usbp->st_ctimensec = sbp->st_ctimensec;
10505 #endif
10506         usbp->st_size = sbp->st_size;
10507         usbp->st_blocks = sbp->st_blocks;
10508         usbp->st_blksize = sbp->st_blksize;
10509         usbp->st_flags = sbp->st_flags;
10510         usbp->st_gen = sbp->st_gen;
10511         usbp->st_lspare = sbp->st_lspare;
10512         usbp->st_qspare[0] = sbp->st_qspare[0];
10513         usbp->st_qspare[1] = sbp->st_qspare[1];
10514 }
10515
10516 /*
10517  * copy stat64 structure into user_stat64 structure.
10518  */
10519 void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
10520 {
10521         bzero(usbp, sizeof(*usbp));
10522
10523         usbp->st_dev = sbp->st_dev;
10524         usbp->st_ino = sbp->st_ino;
10525         usbp->st_mode = sbp->st_mode;
10526         usbp->st_nlink = sbp->st_nlink;
10527         usbp->st_uid = sbp->st_uid;
10528         usbp->st_gid = sbp->st_gid;
10529         usbp->st_rdev = sbp->st_rdev;
10530 #ifndef _POSIX_C_SOURCE
10531         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10532         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10533         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10534         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10535         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10536         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10537         usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
10538         usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
10539 #else
10540         usbp->st_atime = sbp->st_atime;
10541         usbp->st_atimensec = sbp->st_atimensec;
10542         usbp->st_mtime = sbp->st_mtime;
10543         usbp->st_mtimensec = sbp->st_mtimensec;
10544         usbp->st_ctime = sbp->st_ctime;
10545         usbp->st_ctimensec = sbp->st_ctimensec;
10546         usbp->st_birthtime = sbp->st_birthtime;
10547         usbp->st_birthtimensec = sbp->st_birthtimensec;
10548 #endif
10549         usbp->st_size = sbp->st_size;
10550         usbp->st_blocks = sbp->st_blocks;
10551         usbp->st_blksize = sbp->st_blksize;
10552         usbp->st_flags = sbp->st_flags;
10553         usbp->st_gen = sbp->st_gen;
10554         usbp->st_lspare = sbp->st_lspare;
10555         usbp->st_qspare[0] = sbp->st_qspare[0];
10556         usbp->st_qspare[1] = sbp->st_qspare[1];
10557 }
10558
10559 void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
10560 {
10561         bzero(usbp, sizeof(*usbp));
10562
10563         usbp->st_dev = sbp->st_dev;
10564         usbp->st_ino = sbp->st_ino;
10565         usbp->st_mode = sbp->st_mode;
10566         usbp->st_nlink = sbp->st_nlink;
10567         usbp->st_uid = sbp->st_uid;
10568         usbp->st_gid = sbp->st_gid;
10569         usbp->st_rdev = sbp->st_rdev;
10570 #ifndef _POSIX_C_SOURCE
10571         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10572         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10573         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10574         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10575         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10576         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10577         usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
10578         usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
10579 #else
10580         usbp->st_atime = sbp->st_atime;
10581         usbp->st_atimensec = sbp->st_atimensec;
10582         usbp->st_mtime = sbp->st_mtime;
10583         usbp->st_mtimensec = sbp->st_mtimensec;
10584         usbp->st_ctime = sbp->st_ctime;
10585         usbp->st_ctimensec = sbp->st_ctimensec;
10586         usbp->st_birthtime = sbp->st_birthtime;
10587         usbp->st_birthtimensec = sbp->st_birthtimensec;
10588 #endif
10589         usbp->st_size = sbp->st_size;
10590         usbp->st_blocks = sbp->st_blocks;
10591         usbp->st_blksize = sbp->st_blksize;
10592         usbp->st_flags = sbp->st_flags;
10593         usbp->st_gen = sbp->st_gen;
10594         usbp->st_lspare = sbp->st_lspare;
10595         usbp->st_qspare[0] = sbp->st_qspare[0];
10596         usbp->st_qspare[1] = sbp->st_qspare[1];
10597 }
10598
10599 /*
10600  * Purge buffer cache for simulating cold starts
10601  */
10602 static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
10603 {
10604         ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
10605
10606         return VNODE_RETURNED;
10607 }
10608
10609 static int vfs_purge_callback(mount_t mp, __unused void * arg)
10610 {
10611         vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
10612
10613         return VFS_RETURNED;
10614 }
10615
10616 int
10617 vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
10618 {
10619         if (!kauth_cred_issuser(kauth_cred_get()))
10620                 return EPERM;
10621
10622         vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
10623
10624         return 0;
10625 }
10626