bsd/vfs/vfs_syscalls.c

   1 /*
   2  * Copyright (c) 1995-2008 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1989, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  * (c) UNIX System Laboratories, Inc.
  32  * All or some portions of this file are derived from material licensed
  33  * to the University of California by American Telephone and Telegraph
  34  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  35  * the permission of UNIX System Laboratories, Inc.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      @(#)vfs_syscalls.c      8.41 (Berkeley) 6/15/95
  66  */
  67 /*
  68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  69  * support for mandatory and extensible security protections.  This notice
  70  * is included in support of clause 2.2 (b) of the Apple Public License,
  71  * Version 2.0.
  72  */
  73
  74 #include <sys/param.h>
  75 #include <sys/systm.h>
  76 #include <sys/namei.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/kernel.h>
  79 #include <sys/file_internal.h>
  80 #include <sys/stat.h>
  81 #include <sys/vnode_internal.h>
  82 #include <sys/mount_internal.h>
  83 #include <sys/proc_internal.h>
  84 #include <sys/kauth.h>
  85 #include <sys/uio_internal.h>
  86 #include <sys/malloc.h>
  87 #include <sys/mman.h>
  88 #include <sys/dirent.h>
  89 #include <sys/attr.h>
  90 #include <sys/sysctl.h>
  91 #include <sys/ubc.h>
  92 #include <sys/quota.h>
  93 #include <sys/kdebug.h>
  94 #include <sys/fsevents.h>
  95 #include <sys/sysproto.h>
  96 #include <sys/xattr.h>
  97 #include <sys/fcntl.h>
  98 #include <sys/fsctl.h>
  99 #include <sys/ubc_internal.h>
 100 #include <sys/disk.h>
 101 #include <machine/cons.h>
 102 #include <machine/limits.h>
 103 #include <miscfs/specfs/specdev.h>
 104 #include <miscfs/union/union.h>
 105
 106 #include <security/audit/audit.h>
 107 #include <bsm/audit_kevents.h>
 108
 109 #include <mach/mach_types.h>
 110 #include <kern/kern_types.h>
 111 #include <kern/kalloc.h>
 112
 113 #include <vm/vm_pageout.h>
 114
 115 #include <libkern/OSAtomic.h>
 116 #include <pexpert/pexpert.h>
 117
 118 #if CONFIG_MACF
 119 #include <security/mac.h>
 120 #include <security/mac_framework.h>
 121 #endif
 122
 123 #if CONFIG_FSE
 124 #define GET_PATH(x) \
 125         (x) = get_pathbuff();
 126 #define RELEASE_PATH(x) \
 127         release_pathbuff(x);
 128 #else
 129 #define GET_PATH(x)     \
 130         MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
 131 #define RELEASE_PATH(x) \
 132         FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
 133 #endif /* CONFIG_FSE */
 134
 135 /* struct for checkdirs iteration */
 136 struct cdirargs {
 137         vnode_t olddp;
 138         vnode_t newdp;
 139 };
 140 /* callback  for checkdirs iteration */
 141 static int checkdirs_callback(proc_t p, void * arg);
 142
 143 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
 144 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
 145 void enablequotas(struct mount *mp, vfs_context_t ctx);
 146 static int getfsstat_callback(mount_t mp, void * arg);
 147 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
 148 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
 149 static int sync_callback(mount_t, void *);
 150 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
 151                         user_addr_t bufp, int *sizep, boolean_t is_64_bit,
 152                                                 boolean_t partial_copy);
 153 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
 154                         user_addr_t bufp);
 155 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
 156
 157 #ifdef CONFIG_IMGSRC_ACCESS
 158 static int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname);
 159 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
 160 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
 161 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
 162 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
 163 static void mount_end_update(mount_t mp);
 164 static int relocate_imageboot_source(vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs);
 165 #endif /* CONFIG_IMGSRC_ACCESS */
 166
 167 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
 168
 169 __private_extern__
 170 int sync_internal(void);
 171
 172 __private_extern__
 173 int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, int32_t *);
 174
 175 __private_extern__
 176 int unlink1(vfs_context_t, struct nameidata *, int);
 177
 178
 179 #ifdef __APPLE_API_OBSOLETE
 180 struct fstatv_args {
 181        int fd;                  /* file descriptor of the target file */
 182        struct vstat *vsb;       /* vstat structure for returned info  */
 183 };
 184 struct lstatv_args {
 185        const char *path;        /* pathname of the target file       */
 186        struct vstat *vsb;       /* vstat structure for returned info */
 187 };
 188 struct mkcomplex_args {
 189         const char *path;       /* pathname of the file to be created */
 190                 mode_t mode;            /* access mode for the newly created file */
 191         u_int32_t type;         /* format of the complex file */
 192 };
 193 struct statv_args {
 194         const char *path;       /* pathname of the target file       */
 195         struct vstat *vsb;      /* vstat structure for returned info */
 196 };
 197
 198 int fstatv(proc_t p, struct fstatv_args *uap, int32_t *retval);
 199 int lstatv(proc_t p, struct lstatv_args *uap, int32_t *retval);
 200 int mkcomplex(proc_t p, struct mkcomplex_args *uap, int32_t *retval);
 201 int statv(proc_t p, struct statv_args *uap, int32_t *retval);
 202
 203 #endif /* __APPLE_API_OBSOLETE */
 204
 205 /*
 206  * incremented each time a mount or unmount operation occurs
 207  * used to invalidate the cached value of the rootvp in the
 208  * mount structure utilized by cache_lookup_path
 209  */
 210 uint32_t mount_generation = 0;
 211
 212 /* counts number of mount and unmount operations */
 213 unsigned int vfs_nummntops=0;
 214
 215 extern struct fileops vnops;
 216 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
 217
 218
 219 /*
 220  * Virtual File System System Calls
 221  */
 222
 223 /*
 224  * Mount a file system.
 225  */
 226 /* ARGSUSED */
 227 int
 228 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
 229 {
 230         struct __mac_mount_args muap;
 231
 232         muap.type = uap->type;
 233         muap.path = uap->path;
 234         muap.flags = uap->flags;
 235         muap.data = uap->data;
 236         muap.mac_p = USER_ADDR_NULL;
 237         return (__mac_mount(p, &muap, retval));
 238 }
 239
 240 /*
 241  * __mac_mount:
 242  *      Mount a file system taking into account MAC label behavior.
 243  *      See mount(2) man page for more information
 244  *
 245  * Parameters:    p                        Process requesting the mount
 246  *                uap                      User argument descriptor (see below)
 247  *                retval                   (ignored)
 248  *
 249  * Indirect:      uap->type                Filesystem type
 250  *                uap->path                Path to mount
 251  *                uap->data                Mount arguments
 252  *                uap->mac_p               MAC info
 253  *                uap->flags               Mount flags
 254  *
 255  *
 256  * Returns:        0                       Success
 257  *                !0                       Not success
 258  */
 259 int
 260 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
 261 {
 262         struct vnode *vp, *pvp;
 263         struct vnode *devvp = NULLVP;
 264         struct vnode *device_vnode = NULLVP;
 265 #if CONFIG_MACF
 266         struct vnode *rvp;
 267 #endif
 268         struct mount *mp;
 269         struct vfstable *vfsp = (struct vfstable *)0;
 270         int error, flag = 0;
 271         struct vnode_attr va;
 272         vfs_context_t ctx = vfs_context_current();
 273         struct nameidata nd;
 274         struct nameidata nd1;
 275         char fstypename[MFSNAMELEN];
 276         size_t dummy=0;
 277         user_addr_t devpath = USER_ADDR_NULL;
 278         user_addr_t fsmountargs =  uap->data;
 279         int ronly = 0;
 280         int mntalloc = 0;
 281         boolean_t vfsp_ref = FALSE;
 282         mode_t accessmode;
 283         boolean_t is_64bit;
 284         boolean_t is_rwlock_locked = FALSE;
 285         boolean_t did_rele = FALSE;
 286         boolean_t have_usecount = FALSE;
 287
 288         AUDIT_ARG(fflags, uap->flags);
 289
 290         is_64bit = proc_is64bit(p);
 291
 292         /*
 293          * Get vnode to be covered
 294          */
 295         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1 | WANTPARENT,
 296                    UIO_USERSPACE, uap->path, ctx);
 297         error = namei(&nd);
 298         if (error)
 299                 return (error);
 300         vp = nd.ni_vp;
 301         pvp = nd.ni_dvp;
 302
 303         if ((vp->v_flag & VROOT) &&
 304                 (vp->v_mount->mnt_flag & MNT_ROOTFS))
 305                         uap->flags |= MNT_UPDATE;
 306
 307         error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
 308         if (error)
 309                 goto out1;
 310
 311 #ifdef CONFIG_IMGSRC_ACCESS
 312         if (uap->flags == MNT_IMGSRC) {
 313                 error = relocate_imageboot_source(vp, &nd.ni_cnd, fstypename, ctx, is_64bit, fsmountargs);
 314                 vnode_put(pvp);
 315                 vnode_put(vp);
 316                 return error;
 317         }
 318 #endif /* CONFIG_IMGSRC_ACCESS */
 319
 320         if (uap->flags & MNT_UPDATE) {
 321                 if ((vp->v_flag & VROOT) == 0) {
 322                         error = EINVAL;
 323                         goto out1;
 324                 }
 325                 mp = vp->v_mount;
 326
 327                 /* unmount in progress return error */
 328                 mount_lock_spin(mp);
 329                 if (mp->mnt_lflag & MNT_LUNMOUNT) {
 330                         mount_unlock(mp);
 331                         error = EBUSY;
 332                         goto out1;
 333                 }
 334                 mount_unlock(mp);
 335                 lck_rw_lock_exclusive(&mp->mnt_rwlock);
 336                 is_rwlock_locked = TRUE;
 337                 /*
 338                  * We only allow the filesystem to be reloaded if it
 339                  * is currently mounted read-only.
 340                  */
 341                 if ((uap->flags & MNT_RELOAD) &&
 342                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
 343                         error = ENOTSUP;
 344                         goto out1;
 345                 }
 346
 347 #ifdef CONFIG_IMGSRC_ACCESS
 348                 /* Can't downgrade the backer of the root FS */
 349                 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
 350                         (!vfs_isrdonly(mp)) && (uap->flags & MNT_RDONLY))
 351                 {
 352                         error = ENOTSUP;
 353                         goto out1;
 354                 }
 355 #endif /* CONFIG_IMGSRC_ACCESS */
 356
 357                 /*
 358                  * Only root, or the user that did the original mount is
 359                  * permitted to update it.
 360                  */
 361                 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
 362                     (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
 363                         goto out1;
 364                 }
 365 #if CONFIG_MACF
 366                 error = mac_mount_check_remount(ctx, mp);
 367                 if (error != 0) {
 368                         lck_rw_done(&mp->mnt_rwlock);
 369                         goto out1;
 370                 }
 371 #endif
 372                 /*
 373                  * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
 374                  * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
 375                  */
 376                 if (suser(vfs_context_ucred(ctx), NULL)) {
 377                         uap->flags |= MNT_NOSUID | MNT_NODEV;
 378                         if (mp->mnt_flag & MNT_NOEXEC)
 379                                 uap->flags |= MNT_NOEXEC;
 380                 }
 381                 flag = mp->mnt_flag;
 382
 383                 mp->mnt_flag |=
 384                     uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
 385
 386                 vfsp = mp->mnt_vtable;
 387                 goto update;
 388         }
 389         /*
 390          * If the user is not root, ensure that they own the directory
 391          * onto which we are attempting to mount.
 392          */
 393         VATTR_INIT(&va);
 394         VATTR_WANTED(&va, va_uid);
 395         if ((error = vnode_getattr(vp, &va, ctx)) ||
 396             (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
 397              (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))) {
 398                 goto out1;
 399         }
 400         /*
 401          * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
 402          * MNT_NOEXEC if mount point is already MNT_NOEXEC.
 403          */
 404         if (suser(vfs_context_ucred(ctx), NULL)) {
 405                 uap->flags |= MNT_NOSUID | MNT_NODEV;
 406                 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
 407                         uap->flags |= MNT_NOEXEC;
 408         }
 409         if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
 410                 goto out1;
 411
 412         if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
 413                 goto out1;
 414
 415         if (vp->v_type != VDIR) {
 416                 error = ENOTDIR;
 417                 goto out1;
 418         }
 419
 420         /* XXXAUDIT: Should we capture the type on the error path as well? */
 421         AUDIT_ARG(text, fstypename);
 422         mount_list_lock();
 423         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 424                 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
 425                         vfsp->vfc_refcount++;
 426                         vfsp_ref = TRUE;
 427                         break;
 428                 }
 429         mount_list_unlock();
 430         if (vfsp == NULL) {
 431                 error = ENODEV;
 432                 goto out1;
 433         }
 434 #if CONFIG_MACF
 435         error = mac_mount_check_mount(ctx, vp,
 436             &nd.ni_cnd, vfsp->vfc_name);
 437         if (error != 0)
 438                 goto out1;
 439 #endif
 440         if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
 441                 error = EBUSY;
 442                 goto out1;
 443         }
 444         vnode_lock_spin(vp);
 445         SET(vp->v_flag, VMOUNT);
 446         vnode_unlock(vp);
 447
 448         /*
 449          * Allocate and initialize the filesystem.
 450          */
 451         MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
 452                 M_MOUNT, M_WAITOK);
 453         bzero((char *)mp, (u_int32_t)sizeof(struct mount));
 454         mntalloc = 1;
 455
 456         /* Initialize the default IO constraints */
 457         mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
 458         mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
 459         mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
 460         mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
 461         mp->mnt_devblocksize = DEV_BSIZE;
 462         mp->mnt_alignmentmask = PAGE_MASK;
 463         mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
 464         mp->mnt_ioscale = 1;
 465         mp->mnt_ioflags = 0;
 466         mp->mnt_realrootvp = NULLVP;
 467         mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
 468
 469         TAILQ_INIT(&mp->mnt_vnodelist);
 470         TAILQ_INIT(&mp->mnt_workerqueue);
 471         TAILQ_INIT(&mp->mnt_newvnodes);
 472         mount_lock_init(mp);
 473         lck_rw_lock_exclusive(&mp->mnt_rwlock);
 474         is_rwlock_locked = TRUE;
 475         mp->mnt_op = vfsp->vfc_vfsops;
 476         mp->mnt_vtable = vfsp;
 477         //mp->mnt_stat.f_type = vfsp->vfc_typenum;
 478         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 479         strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
 480         strncpy(mp->mnt_vfsstat.f_mntonname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
 481         mp->mnt_vnodecovered = vp;
 482         mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
 483         mp->mnt_devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
 484
 485         /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
 486         vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
 487
 488 update:
 489         /*
 490          * Set the mount level flags.
 491          */
 492         if (uap->flags & MNT_RDONLY)
 493                 mp->mnt_flag |= MNT_RDONLY;
 494         else if (mp->mnt_flag & MNT_RDONLY)
 495                 mp->mnt_kern_flag |= MNTK_WANTRDWR;
 496         mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 497                           MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 498                           MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED |
 499                           MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE);
 500         mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 501                                       MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 502                                       MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED |
 503                                           MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE);
 504
 505 #if CONFIG_MACF
 506         if (uap->flags & MNT_MULTILABEL) {
 507                 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
 508                         error = EINVAL;
 509                         goto out1;
 510                 }
 511                 mp->mnt_flag |= MNT_MULTILABEL;
 512         }
 513 #endif
 514
 515         if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
 516                 if (is_64bit) {
 517                         if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
 518                                 goto out1;
 519                         fsmountargs += sizeof(devpath);
 520                 } else {
 521                         user32_addr_t tmp;
 522                         if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
 523                                 goto out1;
 524                         /* munge into LP64 addr */
 525                         devpath = CAST_USER_ADDR_T(tmp);
 526                         fsmountargs += sizeof(tmp);
 527                 }
 528
 529                 /* if it is not update and device name needs to be parsed */
 530                 if ((devpath)) {
 531                         NDINIT(&nd1, LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
 532                         if ( (error = namei(&nd1)) )
 533                                 goto out1;
 534
 535                         strncpy(mp->mnt_vfsstat.f_mntfromname, nd1.ni_cnd.cn_pnbuf, MAXPATHLEN);
 536                         devvp = nd1.ni_vp;
 537
 538                         nameidone(&nd1);
 539
 540                         if (devvp->v_type != VBLK) {
 541                                 error = ENOTBLK;
 542                                 goto out2;
 543                         }
 544                         if (major(devvp->v_rdev) >= nblkdev) {
 545                                 error = ENXIO;
 546                                 goto out2;
 547                         }
 548                         /*
 549                         * If mount by non-root, then verify that user has necessary
 550                         * permissions on the device.
 551                         */
 552                         if (suser(vfs_context_ucred(ctx), NULL) != 0) {
 553                                 accessmode = KAUTH_VNODE_READ_DATA;
 554                                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
 555                                         accessmode |= KAUTH_VNODE_WRITE_DATA;
 556                                 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
 557                                         goto out2;
 558                         }
 559                 }
 560                 if (devpath && ((uap->flags & MNT_UPDATE) == 0)) {
 561                         if ( (error = vnode_ref(devvp)) )
 562                                 goto out2;
 563                         /*
 564                         * Disallow multiple mounts of the same device.
 565                         * Disallow mounting of a device that is currently in use
 566                         * (except for root, which might share swap device for miniroot).
 567                         * Flush out any old buffers remaining from a previous use.
 568                         */
 569                         if ( (error = vfs_mountedon(devvp)) )
 570                                 goto out3;
 571
 572                         if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
 573                                 error = EBUSY;
 574                                 goto out3;
 575                         }
 576                         if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
 577                                 error = ENOTBLK;
 578                                 goto out3;
 579                         }
 580                         if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
 581                                 goto out3;
 582
 583                         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 584 #if CONFIG_MACF
 585                         error = mac_vnode_check_open(ctx,
 586                             devvp,
 587                             ronly ? FREAD : FREAD|FWRITE);
 588                         if (error)
 589                                 goto out3;
 590 #endif /* MAC */
 591                         if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
 592                                 goto out3;
 593
 594                         mp->mnt_devvp = devvp;
 595                         device_vnode = devvp;
 596                 } else {
 597                         if ((mp->mnt_flag & MNT_RDONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 598                                 dev_t dev;
 599                                 int maj;
 600                                 /*
 601                                  * If upgrade to read-write by non-root, then verify
 602                                  * that user has necessary permissions on the device.
 603                                  */
 604                                 device_vnode = mp->mnt_devvp;
 605
 606                                 if (device_vnode) {
 607                                         vnode_getalways(device_vnode);
 608
 609                                         if (suser(vfs_context_ucred(ctx), NULL)) {
 610                                                 if ((error = vnode_authorize(device_vnode, NULL,
 611                                                                                 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) {
 612                                                         vnode_put(device_vnode);
 613                                                         goto out2;
 614                                                 }
 615                                         }
 616
 617                                         /* Tell the device that we're upgrading */
 618                                         dev = (dev_t)device_vnode->v_rdev;
 619                                         maj = major(dev);
 620
 621                                         if ((u_int)maj >= (u_int)nblkdev)
 622                                                 panic("Volume mounted on a device with invalid major number.\n");
 623
 624                                         error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
 625
 626                                         vnode_put(device_vnode);
 627                                         if (error != 0) {
 628                                                 goto out2;
 629                                         }
 630                                 }
 631                         }
 632                         device_vnode = NULLVP;
 633                 }
 634         }
 635 #if CONFIG_MACF
 636         if ((uap->flags & MNT_UPDATE) == 0) {
 637                 mac_mount_label_init(mp);
 638                 mac_mount_label_associate(ctx, mp);
 639         }
 640         if (uap->mac_p != USER_ADDR_NULL) {
 641                 struct user_mac mac;
 642                 char *labelstr = NULL;
 643                 size_t ulen = 0;
 644
 645                 if ((uap->flags & MNT_UPDATE) != 0) {
 646                         error = mac_mount_check_label_update(
 647                             ctx, mp);
 648                         if (error != 0)
 649                                 goto out3;
 650                 }
 651                 if (is_64bit) {
 652                         error = copyin(uap->mac_p, &mac, sizeof(mac));
 653                 } else {
 654                         struct mac mac32;
 655                         error = copyin(uap->mac_p, &mac32, sizeof(mac32));
 656                         mac.m_buflen = mac32.m_buflen;
 657                         mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
 658                 }
 659                 if (error != 0)
 660                         goto out3;
 661                 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
 662                     (mac.m_buflen < 2)) {
 663                         error = EINVAL;
 664                         goto out3;
 665                 }
 666                 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
 667                 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
 668                 if (error != 0) {
 669                         FREE(labelstr, M_MACTEMP);
 670                         goto out3;
 671                 }
 672                 AUDIT_ARG(mac_string, labelstr);
 673                 error = mac_mount_label_internalize(mp->mnt_mntlabel, labelstr);
 674                 FREE(labelstr, M_MACTEMP);
 675                 if (error != 0)
 676                         goto out3;
 677         }
 678 #endif
 679         if (device_vnode != NULL) {
 680                 VNOP_IOCTL(device_vnode, DKIOCGETBSDUNIT, (caddr_t)&mp->mnt_devbsdunit, 0, NULL);
 681                 mp->mnt_devbsdunit %= LOWPRI_MAX_NUM_DEV;
 682         }
 683
 684         /*
 685          * Mount the filesystem.
 686          */
 687         error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
 688
 689         if (uap->flags & MNT_UPDATE) {
 690                 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
 691                         mp->mnt_flag &= ~MNT_RDONLY;
 692                 mp->mnt_flag &=~
 693                     (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
 694                 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
 695                 if (error)
 696                         mp->mnt_flag = flag;
 697                 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
 698                 lck_rw_done(&mp->mnt_rwlock);
 699                 is_rwlock_locked = FALSE;
 700                 if (!error)
 701                         enablequotas(mp, ctx);
 702                 goto out2;
 703         }
 704         /*
 705          * Put the new filesystem on the mount list after root.
 706          */
 707         if (error == 0) {
 708                 struct vfs_attr vfsattr;
 709 #if CONFIG_MACF
 710                 if (vfs_flags(mp) & MNT_MULTILABEL) {
 711                         error = VFS_ROOT(mp, &rvp, ctx);
 712                         if (error) {
 713                                 printf("%s() VFS_ROOT returned %d\n", __func__, error);
 714                                 goto out3;
 715                         }
 716                         error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
 717                         /*
 718                          * drop reference provided by VFS_ROOT
 719                          */
 720                         vnode_put(rvp);
 721
 722                         if (error)
 723                                 goto out3;
 724                 }
 725 #endif  /* MAC */
 726
 727                 vnode_lock_spin(vp);
 728                 CLR(vp->v_flag, VMOUNT);
 729                 vp->v_mountedhere = mp;
 730                 vnode_unlock(vp);
 731
 732                 /*
 733                  * taking the name_cache_lock exclusively will
 734                  * insure that everyone is out of the fast path who
 735                  * might be trying to use a now stale copy of
 736                  * vp->v_mountedhere->mnt_realrootvp
 737                  * bumping mount_generation causes the cached values
 738                  * to be invalidated
 739                  */
 740                 name_cache_lock();
 741                 mount_generation++;
 742                 name_cache_unlock();
 743
 744                 error = vnode_ref(vp);
 745                 if (error != 0) {
 746                         goto out4;
 747                 }
 748
 749                 have_usecount = TRUE;
 750
 751                 error = checkdirs(vp, ctx);
 752                 if (error != 0)  {
 753                         /* Unmount the filesystem as cdir/rdirs cannot be updated */
 754                         goto out4;
 755                 }
 756                 /*
 757                  * there is no cleanup code here so I have made it void
 758                  * we need to revisit this
 759                  */
 760                 (void)VFS_START(mp, 0, ctx);
 761
 762                 error = mount_list_add(mp);
 763                 if (error != 0) {
 764                         goto out4;
 765                 }
 766
 767                 lck_rw_done(&mp->mnt_rwlock);
 768                 is_rwlock_locked = FALSE;
 769
 770                 /* Check if this mounted file system supports EAs or named streams. */
 771                 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
 772                 VFSATTR_INIT(&vfsattr);
 773                 VFSATTR_WANTED(&vfsattr, f_capabilities);
 774                 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
 775                     vfs_getattr(mp, &vfsattr, ctx) == 0 &&
 776                     VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
 777                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
 778                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
 779                                 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 780                         }
 781 #if NAMEDSTREAMS
 782                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
 783                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
 784                                 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
 785                         }
 786 #endif
 787                         /* Check if this file system supports path from id lookups. */
 788                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
 789                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
 790                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 791                         } else if (mp->mnt_flag & MNT_DOVOLFS) {
 792                                 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
 793                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 794                         }
 795                 }
 796                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
 797                         mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 798                 }
 799                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
 800                         mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
 801                 }
 802                 /* increment the operations count */
 803                 OSAddAtomic(1, &vfs_nummntops);
 804                 enablequotas(mp, ctx);
 805
 806                 if (device_vnode) {
 807                         device_vnode->v_specflags |= SI_MOUNTEDON;
 808
 809                         /*
 810                          *   cache the IO attributes for the underlying physical media...
 811                          *   an error return indicates the underlying driver doesn't
 812                          *   support all the queries necessary... however, reasonable
 813                          *   defaults will have been set, so no reason to bail or care
 814                          */
 815                         vfs_init_io_attributes(device_vnode, mp);
 816                 }
 817
 818                 /* Now that mount is setup, notify the listeners */
 819                 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
 820         } else {
 821                 vnode_lock_spin(vp);
 822                 CLR(vp->v_flag, VMOUNT);
 823                 vnode_unlock(vp);
 824                 mount_list_lock();
 825                 mp->mnt_vtable->vfc_refcount--;
 826                 mount_list_unlock();
 827
 828                 if (device_vnode ) {
 829                         vnode_rele(device_vnode);
 830                         VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
 831                 }
 832                 lck_rw_done(&mp->mnt_rwlock);
 833                 is_rwlock_locked = FALSE;
 834                 mount_lock_destroy(mp);
 835 #if CONFIG_MACF
 836                 mac_mount_label_destroy(mp);
 837 #endif
 838                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
 839         }
 840         nameidone(&nd);
 841
 842         /*
 843          * drop I/O count on covered 'vp' and
 844          * on the device vp if there was one
 845          */
 846         if (devpath && devvp)
 847                 vnode_put(devvp);
 848         vnode_put(vp);
 849
 850         /* Note that we've changed something in the parent directory */
 851         post_event_if_success(pvp, error, NOTE_WRITE);
 852         vnode_put(pvp);
 853
 854         return(error);
 855
 856 out4:
 857         (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
 858         if (device_vnode != NULLVP) {
 859                 vnode_rele(device_vnode);
 860                 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
 861                        ctx);
 862                 did_rele = TRUE;
 863         }
 864         vnode_lock_spin(vp);
 865         vp->v_mountedhere = (mount_t) 0;
 866         vnode_unlock(vp);
 867
 868         if (have_usecount) {
 869                 vnode_rele(vp);
 870         }
 871 out3:
 872         if (devpath && ((uap->flags & MNT_UPDATE) == 0) && (!did_rele))
 873                 vnode_rele(devvp);
 874 out2:
 875         if (devpath && devvp)
 876                 vnode_put(devvp);
 877 out1:
 878         /* Release mnt_rwlock only when it was taken */
 879         if (is_rwlock_locked == TRUE) {
 880                 lck_rw_done(&mp->mnt_rwlock);
 881         }
 882         if (mntalloc) {
 883 #if CONFIG_MACF
 884                 mac_mount_label_destroy(mp);
 885 #endif
 886                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
 887         }
 888
 889         if (vfsp_ref) {
 890                 mount_list_lock();
 891                 vfsp->vfc_refcount--;
 892                 mount_list_unlock();
 893         }
 894         vnode_put(vp);
 895         vnode_put(pvp);
 896         nameidone(&nd);
 897
 898         return(error);
 899 }
 900
 901 #ifdef CONFIG_IMGSRC_ACCESS
 902 /*
 903  * Flush in-core data, check for competing mount attempts,
 904  * and set VMOUNT
 905  */
 906 static int
 907 prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname)
 908 {
 909         struct vnode_attr va;
 910         int error;
 911
 912         /*
 913          * If the user is not root, ensure that they own the directory
 914          * onto which we are attempting to mount.
 915          */
 916         VATTR_INIT(&va);
 917         VATTR_WANTED(&va, va_uid);
 918         if ((error = vnode_getattr(vp, &va, ctx)) ||
 919             (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
 920              (!vfs_context_issuser(ctx)))) {
 921                 error = EPERM;
 922                 goto out;
 923         }
 924
 925         if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
 926                 goto out;
 927
 928         if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
 929                 goto out;
 930
 931         if (vp->v_type != VDIR) {
 932                 error = ENOTDIR;
 933                 goto out;
 934         }
 935
 936         if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
 937                 error = EBUSY;
 938                 goto out;
 939         }
 940
 941 #if CONFIG_MACF
 942         error = mac_mount_check_mount(ctx, vp,
 943             cnp, fsname);
 944         if (error != 0)
 945                 goto out;
 946 #endif
 947
 948         vnode_lock_spin(vp);
 949         SET(vp->v_flag, VMOUNT);
 950         vnode_unlock(vp);
 951
 952 out:
 953         return error;
 954 }
 955
 956 static int
 957 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
 958 {
 959         struct nameidata nd;
 960         vnode_t vp;
 961         mode_t accessmode;
 962         int error;
 963
 964         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
 965         if ( (error = namei(&nd)) )
 966                 return error;
 967
 968         strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
 969         vp = nd.ni_vp;
 970         nameidone(&nd);
 971
 972         if (vp->v_type != VBLK) {
 973                 error = ENOTBLK;
 974                 goto out;
 975         }
 976         if (major(vp->v_rdev) >= nblkdev) {
 977                 error = ENXIO;
 978                 goto out;
 979         }
 980         /*
 981          * If mount by non-root, then verify that user has necessary
 982          * permissions on the device.
 983          */
 984         if (!vfs_context_issuser(ctx)) {
 985                 accessmode = KAUTH_VNODE_READ_DATA;
 986                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
 987                         accessmode |= KAUTH_VNODE_WRITE_DATA;
 988                 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0)
 989                         goto out;
 990         }
 991
 992         *devvpp = vp;
 993 out:
 994         if (error) {
 995                 vnode_put(vp);
 996         }
 997
 998         return error;
 999 }
1000
1001 /*
1002  * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1003  * and call checkdirs()
1004  */
1005 static int
1006 place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1007 {
1008         int error;
1009
1010         mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1011
1012         vnode_lock_spin(vp);
1013         CLR(vp->v_flag, VMOUNT);
1014         vp->v_mountedhere = mp;
1015         vnode_unlock(vp);
1016
1017         /*
1018          * taking the name_cache_lock exclusively will
1019          * insure that everyone is out of the fast path who
1020          * might be trying to use a now stale copy of
1021          * vp->v_mountedhere->mnt_realrootvp
1022          * bumping mount_generation causes the cached values
1023          * to be invalidated
1024          */
1025         name_cache_lock();
1026         mount_generation++;
1027         name_cache_unlock();
1028
1029         error = vnode_ref(vp);
1030         if (error != 0) {
1031                 goto out;
1032         }
1033
1034         error = checkdirs(vp, ctx);
1035         if (error != 0)  {
1036                 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1037                 vnode_rele(vp);
1038                 goto out;
1039         }
1040
1041 out:
1042         if (error != 0) {
1043                 mp->mnt_vnodecovered = NULLVP;
1044         }
1045         return error;
1046 }
1047
1048 static void
1049 undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1050 {
1051         vnode_rele(vp);
1052         vnode_lock_spin(vp);
1053         vp->v_mountedhere = (mount_t)NULL;
1054         vnode_unlock(vp);
1055
1056         mp->mnt_vnodecovered = NULLVP;
1057 }
1058
1059 static int
1060 mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1061 {
1062         int error;
1063
1064         /* unmount in progress return error */
1065         mount_lock_spin(mp);
1066         if (mp->mnt_lflag & MNT_LUNMOUNT) {
1067                 mount_unlock(mp);
1068                 return EBUSY;
1069         }
1070         mount_unlock(mp);
1071         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1072
1073         /*
1074          * We only allow the filesystem to be reloaded if it
1075          * is currently mounted read-only.
1076          */
1077         if ((flags & MNT_RELOAD) &&
1078                         ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1079                 error = ENOTSUP;
1080                 goto out;
1081         }
1082
1083         /*
1084          * Only root, or the user that did the original mount is
1085          * permitted to update it.
1086          */
1087         if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1088                         (!vfs_context_issuser(ctx))) {
1089                 error = EPERM;
1090                 goto out;
1091         }
1092 #if CONFIG_MACF
1093         error = mac_mount_check_remount(ctx, mp);
1094         if (error != 0) {
1095                 goto out;
1096         }
1097 #endif
1098
1099 out:
1100         if (error) {
1101                 lck_rw_done(&mp->mnt_rwlock);
1102         }
1103
1104         return error;
1105 }
1106
1107 static void
1108 mount_end_update(mount_t mp)
1109 {
1110         lck_rw_done(&mp->mnt_rwlock);
1111 }
1112
1113 static int
1114 relocate_imageboot_source(vnode_t vp, struct componentname *cnp,
1115                 const char *fsname, vfs_context_t ctx,
1116                 boolean_t is64bit, user_addr_t fsmountargs)
1117 {
1118         int error;
1119         mount_t mp;
1120         boolean_t placed = FALSE;
1121         vnode_t devvp;
1122         struct vfstable *vfsp;
1123         user_addr_t devpath;
1124         char *old_mntonname;
1125
1126         /* If we didn't imageboot, nothing to move */
1127         if (imgsrc_rootvnode == NULLVP) {
1128                 return EINVAL;
1129         }
1130
1131         /* Only root can do this */
1132         if (!vfs_context_issuser(ctx)) {
1133                 return EPERM;
1134         }
1135
1136         error = vnode_get(imgsrc_rootvnode);
1137         if (error != 0) {
1138                 return error;
1139         }
1140
1141         MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1142
1143         /* Can only move once */
1144         mp = vnode_mount(imgsrc_rootvnode);
1145         if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1146                 error = EBUSY;
1147                 goto out0;
1148         }
1149
1150         /* Get exclusive rwlock on mount, authorize update on mp */
1151         error = mount_begin_update(mp , ctx, 0);
1152         if (error != 0) {
1153                 goto out0;
1154         }
1155
1156         /*
1157          * It can only be moved once.  Flag is set under the rwlock,
1158          * so we're now safe to proceed.
1159          */
1160         if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1161                 goto out1;
1162         }
1163
1164         /* Mark covered vnode as mount in progress, authorize placing mount on top */
1165         error = prepare_coveredvp(vp, ctx, cnp, fsname);
1166         if (error != 0) {
1167                 goto out1;
1168         }
1169
1170         /* Sanity check the name caller has provided */
1171         vfsp = mp->mnt_vtable;
1172         if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1173                 error = EINVAL;
1174                 goto out2;
1175         }
1176
1177         /* Check the device vnode and update mount-from name, for local filesystems */
1178         if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1179                 if (is64bit) {
1180                         if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1181                                 goto out2;
1182                         fsmountargs += sizeof(devpath);
1183                 } else {
1184                         user32_addr_t tmp;
1185                         if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1186                                 goto out2;
1187                         /* munge into LP64 addr */
1188                         devpath = CAST_USER_ADDR_T(tmp);
1189                         fsmountargs += sizeof(tmp);
1190                 }
1191
1192                 if (devpath != USER_ADDR_NULL) {
1193                         error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1194                         if (error) {
1195                                 goto out2;
1196                         }
1197
1198                         vnode_put(devvp);
1199                 }
1200         }
1201
1202         /*
1203          * Place mp on top of vnode, ref the vnode,  call checkdirs(),
1204          * and increment the name cache's mount generation
1205          */
1206         error = place_mount_and_checkdirs(mp, vp, ctx);
1207         if (error != 0) {
1208                 goto out2;
1209         }
1210
1211         placed = TRUE;
1212
1213         strncpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1214         strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1215
1216         /* Forbid future moves */
1217         mount_lock(mp);
1218         mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1219         mount_unlock(mp);
1220
1221         /* Finally, add to mount list, completely ready to go */
1222         error = mount_list_add(mp);
1223         if (error != 0) {
1224                 goto out3;
1225         }
1226
1227         mount_end_update(mp);
1228         vnode_put(imgsrc_rootvnode);
1229         FREE(old_mntonname, M_TEMP);
1230
1231         return 0;
1232 out3:
1233         strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1234
1235         mount_lock(mp);
1236         mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1237         mount_unlock(mp);
1238
1239 out2:
1240         /*
1241          * Placing the mp on the vnode clears VMOUNT,
1242          * so cleanup is different after that point
1243          */
1244         if (placed) {
1245                 /* Rele the vp, clear VMOUNT and v_mountedhere */
1246                 undo_place_on_covered_vp(mp, vp);
1247         } else {
1248                 vnode_lock_spin(vp);
1249                 CLR(vp->v_flag, VMOUNT);
1250                 vnode_unlock(vp);
1251         }
1252 out1:
1253         mount_end_update(mp);
1254
1255 out0:
1256         vnode_put(imgsrc_rootvnode);
1257         FREE(old_mntonname, M_TEMP);
1258         return error;
1259 }
1260
1261 #endif /* CONFIG_IMGSRC_ACCESS */
1262
1263 void
1264 enablequotas(struct mount *mp, vfs_context_t ctx)
1265 {
1266         struct nameidata qnd;
1267         int type;
1268         char qfpath[MAXPATHLEN];
1269         const char *qfname = QUOTAFILENAME;
1270         const char *qfopsname = QUOTAOPSNAME;
1271         const char *qfextension[] = INITQFNAMES;
1272
1273         /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1274         if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1275                 return;
1276         }
1277         /*
1278          * Enable filesystem disk quotas if necessary.
1279          * We ignore errors as this should not interfere with final mount
1280          */
1281         for (type=0; type < MAXQUOTAS; type++) {
1282                 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1283                 NDINIT(&qnd, LOOKUP, FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T(qfpath), ctx);
1284                 if (namei(&qnd) != 0)
1285                         continue;           /* option file to trigger quotas is not present */
1286                 vnode_put(qnd.ni_vp);
1287                 nameidone(&qnd);
1288                 snprintf(qfpath, sizeof(qfpath),  "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1289
1290                 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1291         }
1292         return;
1293 }
1294
1295
1296 static int
1297 checkdirs_callback(proc_t p, void * arg)
1298 {
1299         struct cdirargs * cdrp = (struct cdirargs * )arg;
1300         vnode_t olddp = cdrp->olddp;
1301         vnode_t newdp = cdrp->newdp;
1302         struct filedesc *fdp;
1303         vnode_t tvp;
1304         vnode_t fdp_cvp;
1305         vnode_t fdp_rvp;
1306         int cdir_changed = 0;
1307         int rdir_changed = 0;
1308
1309         /*
1310          * XXX Also needs to iterate each thread in the process to see if it
1311          * XXX is using a per-thread current working directory, and, if so,
1312          * XXX update that as well.
1313          */
1314
1315         proc_fdlock(p);
1316         fdp = p->p_fd;
1317         if (fdp == (struct filedesc *)0) {
1318                 proc_fdunlock(p);
1319                 return(PROC_RETURNED);
1320         }
1321         fdp_cvp = fdp->fd_cdir;
1322         fdp_rvp = fdp->fd_rdir;
1323         proc_fdunlock(p);
1324
1325         if (fdp_cvp == olddp) {
1326                 vnode_ref(newdp);
1327                 tvp = fdp->fd_cdir;
1328                 fdp_cvp = newdp;
1329                 cdir_changed = 1;
1330                 vnode_rele(tvp);
1331         }
1332         if (fdp_rvp == olddp) {
1333                 vnode_ref(newdp);
1334                 tvp = fdp->fd_rdir;
1335                 fdp_rvp = newdp;
1336                 rdir_changed = 1;
1337                 vnode_rele(tvp);
1338         }
1339         if (cdir_changed || rdir_changed) {
1340                 proc_fdlock(p);
1341                 fdp->fd_cdir = fdp_cvp;
1342                 fdp->fd_rdir = fdp_rvp;
1343                 proc_fdunlock(p);
1344         }
1345         return(PROC_RETURNED);
1346 }
1347
1348
1349
1350 /*
1351  * Scan all active processes to see if any of them have a current
1352  * or root directory onto which the new filesystem has just been
1353  * mounted. If so, replace them with the new mount point.
1354  */
1355 static int
1356 checkdirs(vnode_t olddp, vfs_context_t ctx)
1357 {
1358         vnode_t newdp;
1359         vnode_t tvp;
1360         int err;
1361         struct cdirargs cdr;
1362         struct uthread * uth = get_bsdthread_info(current_thread());
1363
1364         if (olddp->v_usecount == 1)
1365                 return(0);
1366         if (uth != (struct uthread *)0)
1367                 uth->uu_notrigger = 1;
1368         err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
1369         if (uth != (struct uthread *)0)
1370                 uth->uu_notrigger = 0;
1371
1372         if (err != 0) {
1373 #if DIAGNOSTIC
1374                 panic("mount: lost mount: error %d", err);
1375 #endif
1376                 return(err);
1377         }
1378
1379         cdr.olddp = olddp;
1380         cdr.newdp = newdp;
1381         /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1382         proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
1383
1384         if (rootvnode == olddp) {
1385                 vnode_ref(newdp);
1386                 tvp = rootvnode;
1387                 rootvnode = newdp;
1388                 vnode_rele(tvp);
1389         }
1390
1391         vnode_put(newdp);
1392         return(0);
1393 }
1394
1395 /*
1396  * Unmount a file system.
1397  *
1398  * Note: unmount takes a path to the vnode mounted on as argument,
1399  * not special file (as before).
1400  */
1401 /* ARGSUSED */
1402 int
1403 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1404 {
1405         vnode_t vp;
1406         struct mount *mp;
1407         int error;
1408         struct nameidata nd;
1409         vfs_context_t ctx = vfs_context_current();
1410
1411         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1412                 UIO_USERSPACE, uap->path, ctx);
1413         error = namei(&nd);
1414         if (error)
1415                 return (error);
1416         vp = nd.ni_vp;
1417         mp = vp->v_mount;
1418         nameidone(&nd);
1419
1420 #if CONFIG_MACF
1421         error = mac_mount_check_umount(ctx, mp);
1422         if (error != 0) {
1423                 vnode_put(vp);
1424                 return (error);
1425         }
1426 #endif
1427         /*
1428          * Must be the root of the filesystem
1429          */
1430         if ((vp->v_flag & VROOT) == 0) {
1431                 vnode_put(vp);
1432                 return (EINVAL);
1433         }
1434         mount_ref(mp, 0);
1435         vnode_put(vp);
1436         /* safedounmount consumes the mount ref */
1437         return (safedounmount(mp, uap->flags, ctx));
1438 }
1439
1440 int
1441 vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
1442 {
1443         mount_t mp;
1444
1445         mp = mount_list_lookupby_fsid(fsid, 0, 1);
1446         if (mp == (mount_t)0) {
1447                 return(ENOENT);
1448         }
1449         mount_ref(mp, 0);
1450         mount_iterdrop(mp);
1451         /* safedounmount consumes the mount ref */
1452         return(safedounmount(mp, flags, ctx));
1453 }
1454
1455
1456 /*
1457  * The mount struct comes with a mount ref which will be consumed.
1458  * Do the actual file system unmount, prevent some common foot shooting.
1459  */
1460 int
1461 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1462 {
1463         int error;
1464         proc_t p = vfs_context_proc(ctx);
1465
1466         /*
1467          * Only root, or the user that did the original mount is
1468          * permitted to unmount this filesystem.
1469          */
1470         if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1471             (error = suser(kauth_cred_get(), &p->p_acflag)))
1472                 goto out;
1473
1474         /*
1475          * Don't allow unmounting the root file system.
1476          */
1477         if (mp->mnt_flag & MNT_ROOTFS) {
1478                 error = EBUSY; /* the root is always busy */
1479                 goto out;
1480         }
1481
1482 #ifdef CONFIG_IMGSRC_ACCESS
1483         if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1484                 error = EBUSY;
1485                 goto out;
1486         }
1487 #endif /* CONFIG_IMGSRC_ACCESS */
1488
1489         return (dounmount(mp, flags, 1, ctx));
1490
1491 out:
1492         mount_drop(mp, 0);
1493         return(error);
1494 }
1495
1496 /*
1497  * Do the actual file system unmount.
1498  */
1499 int
1500 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1501 {
1502         vnode_t coveredvp = (vnode_t)0;
1503         int error;
1504         int needwakeup = 0;
1505         int forcedunmount = 0;
1506         int lflags = 0;
1507         struct vnode *devvp = NULLVP;
1508
1509         if (flags & MNT_FORCE)
1510                 forcedunmount = 1;
1511         mount_lock(mp);
1512         /* XXX post jaguar fix LK_DRAIN - then clean this up */
1513         if ((flags & MNT_FORCE)) {
1514                 mp->mnt_kern_flag |= MNTK_FRCUNMOUNT;
1515                 mp->mnt_lflag |= MNT_LFORCE;
1516         }
1517         if (mp->mnt_lflag & MNT_LUNMOUNT) {
1518                 mp->mnt_lflag |= MNT_LWAIT;
1519                 if(withref != 0)
1520                         mount_drop(mp, 1);
1521                 msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "dounmount", NULL);
1522                 /*
1523                  * The prior unmount attempt has probably succeeded.
1524                  * Do not dereference mp here - returning EBUSY is safest.
1525                  */
1526                 return (EBUSY);
1527         }
1528         mp->mnt_kern_flag |= MNTK_UNMOUNT;
1529         mp->mnt_lflag |= MNT_LUNMOUNT;
1530         mp->mnt_flag &=~ MNT_ASYNC;
1531         /*
1532          * anyone currently in the fast path that
1533          * trips over the cached rootvp will be
1534          * dumped out and forced into the slow path
1535          * to regenerate a new cached value
1536          */
1537         mp->mnt_realrootvp = NULLVP;
1538         mount_unlock(mp);
1539
1540         /*
1541          * taking the name_cache_lock exclusively will
1542          * insure that everyone is out of the fast path who
1543          * might be trying to use a now stale copy of
1544          * vp->v_mountedhere->mnt_realrootvp
1545          * bumping mount_generation causes the cached values
1546          * to be invalidated
1547          */
1548         name_cache_lock();
1549         mount_generation++;
1550         name_cache_unlock();
1551
1552
1553         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1554         if (withref != 0)
1555                 mount_drop(mp, 0);
1556 #if CONFIG_FSE
1557         fsevent_unmount(mp);  /* has to come first! */
1558 #endif
1559         error = 0;
1560         if (forcedunmount == 0) {
1561                 ubc_umount(mp); /* release cached vnodes */
1562                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1563                         error = VFS_SYNC(mp, MNT_WAIT, ctx);
1564                         if (error) {
1565                                 mount_lock(mp);
1566                                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1567                                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1568                                 mp->mnt_lflag &= ~MNT_LFORCE;
1569                                 goto out;
1570                         }
1571                 }
1572         }
1573
1574         if (forcedunmount)
1575                 lflags |= FORCECLOSE;
1576         error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM  | SKIPROOT | lflags);
1577         if ((forcedunmount == 0) && error) {
1578                 mount_lock(mp);
1579                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1580                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1581                 mp->mnt_lflag &= ~MNT_LFORCE;
1582                 goto out;
1583         }
1584
1585         /* make sure there are no one in the mount iterations or lookup */
1586         mount_iterdrain(mp);
1587
1588         error = VFS_UNMOUNT(mp, flags, ctx);
1589         if (error) {
1590                 mount_iterreset(mp);
1591                 mount_lock(mp);
1592                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1593                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1594                 mp->mnt_lflag &= ~MNT_LFORCE;
1595                 goto out;
1596         }
1597
1598         /* increment the operations count */
1599         if (!error)
1600                 OSAddAtomic(1, &vfs_nummntops);
1601
1602         if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1603                 /* hold an io reference and drop the usecount before close */
1604                 devvp = mp->mnt_devvp;
1605                 vnode_getalways(devvp);
1606                 vnode_rele(devvp);
1607                 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1608                        ctx);
1609                 vnode_clearmountedon(devvp);
1610                 vnode_put(devvp);
1611         }
1612         lck_rw_done(&mp->mnt_rwlock);
1613         mount_list_remove(mp);
1614         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1615
1616         /* mark the mount point hook in the vp but not drop the ref yet */
1617         if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
1618                         vnode_getwithref(coveredvp);
1619                         vnode_lock_spin(coveredvp);
1620                         coveredvp->v_mountedhere = (struct mount *)0;
1621                         vnode_unlock(coveredvp);
1622                         vnode_put(coveredvp);
1623         }
1624
1625         mount_list_lock();
1626         mp->mnt_vtable->vfc_refcount--;
1627         mount_list_unlock();
1628
1629         cache_purgevfs(mp);     /* remove cache entries for this file sys */
1630         vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
1631         mount_lock(mp);
1632         mp->mnt_lflag |= MNT_LDEAD;
1633
1634         if (mp->mnt_lflag & MNT_LWAIT) {
1635                 /*
1636                  * do the wakeup here
1637                  * in case we block in mount_refdrain
1638                  * which will drop the mount lock
1639                  * and allow anyone blocked in vfs_busy
1640                  * to wakeup and see the LDEAD state
1641                  */
1642                 mp->mnt_lflag &= ~MNT_LWAIT;
1643                 wakeup((caddr_t)mp);
1644         }
1645         mount_refdrain(mp);
1646 out:
1647         if (mp->mnt_lflag & MNT_LWAIT) {
1648                 mp->mnt_lflag &= ~MNT_LWAIT;
1649                 needwakeup = 1;
1650         }
1651         mount_unlock(mp);
1652         lck_rw_done(&mp->mnt_rwlock);
1653
1654         if (needwakeup)
1655                 wakeup((caddr_t)mp);
1656         if (!error) {
1657                 if ((coveredvp != NULLVP)) {
1658                         vnode_t pvp;
1659
1660                         vnode_getwithref(coveredvp);
1661                         pvp = vnode_getparent(coveredvp);
1662                         vnode_rele(coveredvp);
1663                         vnode_lock_spin(coveredvp);
1664                         if(mp->mnt_crossref == 0) {
1665                                 vnode_unlock(coveredvp);
1666                                 mount_lock_destroy(mp);
1667 #if CONFIG_MACF
1668                                 mac_mount_label_destroy(mp);
1669 #endif
1670                                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1671                         }  else {
1672                                 coveredvp->v_lflag |= VL_MOUNTDEAD;
1673                                 vnode_unlock(coveredvp);
1674                         }
1675                         vnode_put(coveredvp);
1676
1677                         if (pvp) {
1678                                 lock_vnode_and_post(pvp, NOTE_WRITE);
1679                                 vnode_put(pvp);
1680                         }
1681                 } else if (mp->mnt_flag & MNT_ROOTFS) {
1682                                 mount_lock_destroy(mp);
1683 #if CONFIG_MACF
1684                                 mac_mount_label_destroy(mp);
1685 #endif
1686                                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1687                 } else
1688                         panic("dounmount: no coveredvp");
1689         }
1690         return (error);
1691 }
1692
1693 void
1694 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
1695 {
1696                 vnode_lock(dp);
1697                 mp->mnt_crossref--;
1698                 if (mp->mnt_crossref < 0)
1699                         panic("mount cross refs -ve");
1700                 if (((dp->v_lflag & VL_MOUNTDEAD) == VL_MOUNTDEAD) && (mp->mnt_crossref == 0)) {
1701                         dp->v_lflag &= ~VL_MOUNTDEAD;
1702                         if (need_put)
1703                                 vnode_put_locked(dp);
1704                         vnode_unlock(dp);
1705                         mount_lock_destroy(mp);
1706 #if CONFIG_MACF
1707                         mac_mount_label_destroy(mp);
1708 #endif
1709                         FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1710                         return;
1711                 }
1712                 if (need_put)
1713                         vnode_put_locked(dp);
1714                 vnode_unlock(dp);
1715 }
1716
1717
1718 /*
1719  * Sync each mounted filesystem.
1720  */
1721 #if DIAGNOSTIC
1722 int syncprt = 0;
1723 struct ctldebug debug0 = { "syncprt", &syncprt };
1724 #endif
1725
1726 int print_vmpage_stat=0;
1727
1728 static int
1729 sync_callback(mount_t mp, void * arg)
1730 {
1731         int asyncflag;
1732
1733         if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1734                         asyncflag = mp->mnt_flag & MNT_ASYNC;
1735                         mp->mnt_flag &= ~MNT_ASYNC;
1736                         VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_current());
1737                         if (asyncflag)
1738                                 mp->mnt_flag |= MNT_ASYNC;
1739         }
1740         return(VFS_RETURNED);
1741 }
1742
1743
1744 #include <kern/clock.h>
1745
1746 clock_sec_t sync_wait_time = 0;
1747
1748 /* ARGSUSED */
1749 int
1750 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
1751 {
1752         clock_nsec_t nsecs;
1753
1754         vfs_iterate(LK_NOWAIT, sync_callback, (void *)0);
1755
1756         {
1757                 static fsid_t fsid = { { 0, 0 } };
1758
1759                 clock_get_calendar_microtime(&sync_wait_time, &nsecs);
1760                 vfs_event_signal(&fsid, VQ_SYNCEVENT, (intptr_t)NULL);
1761                 wakeup((caddr_t)&sync_wait_time);
1762         }
1763
1764         {
1765         if(print_vmpage_stat) {
1766                 vm_countdirtypages();
1767         }
1768         }
1769 #if DIAGNOSTIC
1770         if (syncprt)
1771                 vfs_bufstats();
1772 #endif /* DIAGNOSTIC */
1773         return (0);
1774 }
1775
1776 /*
1777  * Change filesystem quotas.
1778  */
1779 #if QUOTA
1780 static int quotactl_funneled(proc_t p, struct quotactl_args *uap, int32_t *retval);
1781
1782 int
1783 quotactl(proc_t p, struct quotactl_args *uap, int32_t *retval)
1784 {
1785         boolean_t funnel_state;
1786         int error;
1787
1788         funnel_state = thread_funnel_set(kernel_flock, TRUE);
1789         error = quotactl_funneled(p, uap, retval);
1790         thread_funnel_set(kernel_flock, funnel_state);
1791         return(error);
1792 }
1793
1794 static int
1795 quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
1796 {
1797         struct mount *mp;
1798         int error, quota_cmd, quota_status;
1799         caddr_t datap;
1800         size_t fnamelen;
1801         struct nameidata nd;
1802         vfs_context_t ctx = vfs_context_current();
1803         struct dqblk my_dqblk;
1804
1805         AUDIT_ARG(uid, uap->uid);
1806         AUDIT_ARG(cmd, uap->cmd);
1807         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
1808                 UIO_USERSPACE, uap->path, ctx);
1809         error = namei(&nd);
1810         if (error)
1811                 return (error);
1812         mp = nd.ni_vp->v_mount;
1813         vnode_put(nd.ni_vp);
1814         nameidone(&nd);
1815
1816         /* copyin any data we will need for downstream code */
1817         quota_cmd = uap->cmd >> SUBCMDSHIFT;
1818
1819         switch (quota_cmd) {
1820         case Q_QUOTAON:
1821                 /* uap->arg specifies a file from which to take the quotas */
1822                 fnamelen = MAXPATHLEN;
1823                 datap = kalloc(MAXPATHLEN);
1824                 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
1825                 break;
1826         case Q_GETQUOTA:
1827                 /* uap->arg is a pointer to a dqblk structure. */
1828                 datap = (caddr_t) &my_dqblk;
1829                 break;
1830         case Q_SETQUOTA:
1831         case Q_SETUSE:
1832                 /* uap->arg is a pointer to a dqblk structure. */
1833                 datap = (caddr_t) &my_dqblk;
1834                 if (proc_is64bit(p)) {
1835                         struct user_dqblk       my_dqblk64;
1836                         error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
1837                         if (error == 0) {
1838                                 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
1839                         }
1840                 }
1841                 else {
1842                         error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
1843                 }
1844                 break;
1845         case Q_QUOTASTAT:
1846                 /* uap->arg is a pointer to an integer */
1847                 datap = (caddr_t) &quota_status;
1848                 break;
1849         default:
1850                 datap = NULL;
1851                 break;
1852         } /* switch */
1853
1854         if (error == 0) {
1855                 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
1856         }
1857
1858         switch (quota_cmd) {
1859         case Q_QUOTAON:
1860                 if (datap != NULL)
1861                         kfree(datap, MAXPATHLEN);
1862                 break;
1863         case Q_GETQUOTA:
1864                 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
1865                 if (error == 0) {
1866                         if (proc_is64bit(p)) {
1867                                 struct user_dqblk       my_dqblk64;
1868                                 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
1869                                 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
1870                         }
1871                         else {
1872                                 error = copyout(datap, uap->arg, sizeof (struct dqblk));
1873                         }
1874                 }
1875                 break;
1876         case Q_QUOTASTAT:
1877                 /* uap->arg is a pointer to an integer */
1878                 if (error == 0) {
1879                         error = copyout(datap, uap->arg, sizeof(quota_status));
1880                 }
1881                 break;
1882         default:
1883                 break;
1884         } /* switch */
1885
1886         return (error);
1887 }
1888 #else
1889 int
1890 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
1891 {
1892         return (EOPNOTSUPP);
1893 }
1894 #endif /* QUOTA */
1895
1896 /*
1897  * Get filesystem statistics.
1898  *
1899  * Returns:     0                       Success
1900  *      namei:???
1901  *      vfs_update_vfsstat:???
1902  *      munge_statfs:EFAULT
1903  */
1904 /* ARGSUSED */
1905 int
1906 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
1907 {
1908         struct mount *mp;
1909         struct vfsstatfs *sp;
1910         int error;
1911         struct nameidata nd;
1912         vfs_context_t ctx = vfs_context_current();
1913         vnode_t vp;
1914
1915         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1916                 UIO_USERSPACE, uap->path, ctx);
1917         error = namei(&nd);
1918         if (error)
1919                 return (error);
1920         vp = nd.ni_vp;
1921         mp = vp->v_mount;
1922         sp = &mp->mnt_vfsstat;
1923         nameidone(&nd);
1924
1925         error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
1926         vnode_put(vp);
1927         if (error != 0)
1928                 return (error);
1929
1930         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
1931         return (error);
1932 }
1933
1934 /*
1935  * Get filesystem statistics.
1936  */
1937 /* ARGSUSED */
1938 int
1939 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
1940 {
1941         vnode_t vp;
1942         struct mount *mp;
1943         struct vfsstatfs *sp;
1944         int error;
1945
1946         AUDIT_ARG(fd, uap->fd);
1947
1948         if ( (error = file_vnode(uap->fd, &vp)) )
1949                 return (error);
1950
1951         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
1952
1953         mp = vp->v_mount;
1954         if (!mp) {
1955                 file_drop(uap->fd);
1956                 return (EBADF);
1957         }
1958         sp = &mp->mnt_vfsstat;
1959         if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
1960                 file_drop(uap->fd);
1961                 return (error);
1962         }
1963         file_drop(uap->fd);
1964
1965         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
1966
1967         return (error);
1968 }
1969
1970 /*
1971  * Common routine to handle copying of statfs64 data to user space
1972  */
1973 static int
1974 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
1975 {
1976         int error;
1977         struct statfs64 sfs;
1978
1979         bzero(&sfs, sizeof(sfs));
1980
1981         sfs.f_bsize = sfsp->f_bsize;
1982         sfs.f_iosize = (int32_t)sfsp->f_iosize;
1983         sfs.f_blocks = sfsp->f_blocks;
1984         sfs.f_bfree = sfsp->f_bfree;
1985         sfs.f_bavail = sfsp->f_bavail;
1986         sfs.f_files = sfsp->f_files;
1987         sfs.f_ffree = sfsp->f_ffree;
1988         sfs.f_fsid = sfsp->f_fsid;
1989         sfs.f_owner = sfsp->f_owner;
1990         sfs.f_type = mp->mnt_vtable->vfc_typenum;
1991         sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1992         sfs.f_fssubtype = sfsp->f_fssubtype;
1993         strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
1994         strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
1995         strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
1996
1997         error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
1998
1999         return(error);
2000 }
2001
2002 /*
2003  * Get file system statistics in 64-bit mode
2004  */
2005 int
2006 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2007 {
2008         struct mount *mp;
2009         struct vfsstatfs *sp;
2010         int error;
2011         struct nameidata nd;
2012         vfs_context_t ctxp = vfs_context_current();
2013         vnode_t vp;
2014
2015         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
2016                 UIO_USERSPACE, uap->path, ctxp);
2017         error = namei(&nd);
2018         if (error)
2019                 return (error);
2020         vp = nd.ni_vp;
2021         mp = vp->v_mount;
2022         sp = &mp->mnt_vfsstat;
2023         nameidone(&nd);
2024
2025         error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
2026         vnode_put(vp);
2027         if (error != 0)
2028                 return (error);
2029
2030         error = statfs64_common(mp, sp, uap->buf);
2031
2032         return (error);
2033 }
2034
2035 /*
2036  * Get file system statistics in 64-bit mode
2037  */
2038 int
2039 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2040 {
2041         struct vnode *vp;
2042         struct mount *mp;
2043         struct vfsstatfs *sp;
2044         int error;
2045
2046         AUDIT_ARG(fd, uap->fd);
2047
2048         if ( (error = file_vnode(uap->fd, &vp)) )
2049                 return (error);
2050
2051         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2052
2053         mp = vp->v_mount;
2054         if (!mp) {
2055                 file_drop(uap->fd);
2056                 return (EBADF);
2057         }
2058         sp = &mp->mnt_vfsstat;
2059         if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2060                 file_drop(uap->fd);
2061                 return (error);
2062         }
2063         file_drop(uap->fd);
2064
2065         error = statfs64_common(mp, sp, uap->buf);
2066
2067         return (error);
2068 }
2069
2070 struct getfsstat_struct {
2071         user_addr_t     sfsp;
2072         user_addr_t     *mp;
2073         int             count;
2074         int             maxcount;
2075         int             flags;
2076         int             error;
2077 };
2078
2079
2080 static int
2081 getfsstat_callback(mount_t mp, void * arg)
2082 {
2083
2084         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2085         struct vfsstatfs *sp;
2086         int error, my_size;
2087         vfs_context_t ctx = vfs_context_current();
2088
2089         if (fstp->sfsp && fstp->count < fstp->maxcount) {
2090                 sp = &mp->mnt_vfsstat;
2091                 /*
2092                  * If MNT_NOWAIT is specified, do not refresh the
2093                  * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2094                  */
2095                 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2096                         (error = vfs_update_vfsstat(mp, ctx,
2097                             VFS_USER_EVENT))) {
2098                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2099                         return(VFS_RETURNED);
2100                 }
2101
2102                 /*
2103                  * Need to handle LP64 version of struct statfs
2104                  */
2105                 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
2106                 if (error) {
2107                         fstp->error = error;
2108                         return(VFS_RETURNED_DONE);
2109                 }
2110                 fstp->sfsp += my_size;
2111
2112                 if (fstp->mp) {
2113                         error = mac_mount_label_get(mp, *fstp->mp);
2114                         if (error) {
2115                                 fstp->error = error;
2116                                 return(VFS_RETURNED_DONE);
2117                         }
2118                         fstp->mp++;
2119                 }
2120         }
2121         fstp->count++;
2122         return(VFS_RETURNED);
2123 }
2124
2125 /*
2126  * Get statistics on all filesystems.
2127  */
2128 int
2129 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2130 {
2131         struct __mac_getfsstat_args muap;
2132
2133         muap.buf = uap->buf;
2134         muap.bufsize = uap->bufsize;
2135         muap.mac = USER_ADDR_NULL;
2136         muap.macsize = 0;
2137         muap.flags = uap->flags;
2138
2139         return (__mac_getfsstat(p, &muap, retval));
2140 }
2141
2142 /*
2143  * __mac_getfsstat: Get MAC-related file system statistics
2144  *
2145  * Parameters:    p                        (ignored)
2146  *                uap                      User argument descriptor (see below)
2147  *                retval                   Count of file system statistics (N stats)
2148  *
2149  * Indirect:      uap->bufsize             Buffer size
2150  *                uap->macsize             MAC info size
2151  *                uap->buf                 Buffer where information will be returned
2152  *                uap->mac                 MAC info
2153  *                uap->flags               File system flags
2154  *
2155  *
2156  * Returns:        0                       Success
2157  *                !0                       Not success
2158  *
2159  */
2160 int
2161 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
2162 {
2163         user_addr_t sfsp;
2164         user_addr_t *mp;
2165         size_t count, maxcount, bufsize, macsize;
2166         struct getfsstat_struct fst;
2167
2168         bufsize = (size_t) uap->bufsize;
2169         macsize = (size_t) uap->macsize;
2170
2171         if (IS_64BIT_PROCESS(p)) {
2172                 maxcount = bufsize / sizeof(struct user64_statfs);
2173         }
2174         else {
2175                 maxcount = bufsize / sizeof(struct user32_statfs);
2176         }
2177         sfsp = uap->buf;
2178         count = 0;
2179
2180         mp = NULL;
2181
2182 #if CONFIG_MACF
2183         if (uap->mac != USER_ADDR_NULL) {
2184                 u_int32_t *mp0;
2185                 int error;
2186                 unsigned int i;
2187
2188                 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2189                 if (count != maxcount)
2190                         return (EINVAL);
2191
2192                 /* Copy in the array */
2193                 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2194                 if (mp0 == NULL) {
2195                         return (ENOMEM);
2196                 }
2197
2198                 error = copyin(uap->mac, mp0, macsize);
2199                 if (error) {
2200                         FREE(mp0, M_MACTEMP);
2201                         return (error);
2202                 }
2203
2204                 /* Normalize to an array of user_addr_t */
2205                 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
2206                 if (mp == NULL) {
2207                         FREE(mp0, M_MACTEMP);
2208                         return (ENOMEM);
2209                 }
2210
2211                 for (i = 0; i < count; i++) {
2212                         if (IS_64BIT_PROCESS(p))
2213                                 mp[i] = ((user_addr_t *)mp0)[i];
2214                         else
2215                                 mp[i] = (user_addr_t)mp0[i];
2216                 }
2217                 FREE(mp0, M_MACTEMP);
2218         }
2219 #endif
2220
2221
2222         fst.sfsp = sfsp;
2223         fst.mp = mp;
2224         fst.flags = uap->flags;
2225         fst.count = 0;
2226         fst.error = 0;
2227         fst.maxcount = maxcount;
2228
2229
2230         vfs_iterate(0, getfsstat_callback, &fst);
2231
2232         if (mp)
2233                 FREE(mp, M_MACTEMP);
2234
2235         if (fst.error ) {
2236                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2237                 return(fst.error);
2238         }
2239
2240         if (fst.sfsp && fst.count > fst.maxcount)
2241                 *retval = fst.maxcount;
2242         else
2243                 *retval = fst.count;
2244         return (0);
2245 }
2246
2247 static int
2248 getfsstat64_callback(mount_t mp, void * arg)
2249 {
2250         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2251         struct vfsstatfs *sp;
2252         int error;
2253
2254         if (fstp->sfsp && fstp->count < fstp->maxcount) {
2255                 sp = &mp->mnt_vfsstat;
2256                 /*
2257                  * If MNT_NOWAIT is specified, do not refresh the fsstat
2258                  * cache. MNT_WAIT overrides MNT_NOWAIT.
2259                  *
2260                  * We treat MNT_DWAIT as MNT_WAIT for all instances of
2261                  * getfsstat, since the constants are out of the same
2262                  * namespace.
2263                  */
2264                 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2265                      (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2266                     (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2267                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2268                         return(VFS_RETURNED);
2269                 }
2270
2271                 error = statfs64_common(mp, sp, fstp->sfsp);
2272                 if (error) {
2273                         fstp->error = error;
2274                         return(VFS_RETURNED_DONE);
2275                 }
2276                 fstp->sfsp += sizeof(struct statfs64);
2277         }
2278         fstp->count++;
2279         return(VFS_RETURNED);
2280 }
2281
2282 /*
2283  * Get statistics on all file systems in 64 bit mode.
2284  */
2285 int
2286 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2287 {
2288         user_addr_t sfsp;
2289         int count, maxcount;
2290         struct getfsstat_struct fst;
2291
2292         maxcount = uap->bufsize / sizeof(struct statfs64);
2293
2294         sfsp = uap->buf;
2295         count = 0;
2296
2297         fst.sfsp = sfsp;
2298         fst.flags = uap->flags;
2299         fst.count = 0;
2300         fst.error = 0;
2301         fst.maxcount = maxcount;
2302
2303         vfs_iterate(0, getfsstat64_callback, &fst);
2304
2305         if (fst.error ) {
2306                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2307                 return(fst.error);
2308         }
2309
2310         if (fst.sfsp && fst.count > fst.maxcount)
2311                 *retval = fst.maxcount;
2312         else
2313                 *retval = fst.count;
2314
2315         return (0);
2316 }
2317
2318 /*
2319  * Change current working directory to a given file descriptor.
2320  */
2321 /* ARGSUSED */
2322 static int
2323 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
2324 {
2325         struct filedesc *fdp = p->p_fd;
2326         vnode_t vp;
2327         vnode_t tdp;
2328         vnode_t tvp;
2329         struct mount *mp;
2330         int error;
2331         vfs_context_t ctx = vfs_context_current();
2332
2333         AUDIT_ARG(fd, uap->fd);
2334         if (per_thread && uap->fd == -1) {
2335                 /*
2336                  * Switching back from per-thread to per process CWD; verify we
2337                  * in fact have one before proceeding.  The only success case
2338                  * for this code path is to return 0 preemptively after zapping
2339                  * the thread structure contents.
2340                  */
2341                 thread_t th = vfs_context_thread(ctx);
2342                 if (th) {
2343                         uthread_t uth = get_bsdthread_info(th);
2344                         tvp = uth->uu_cdir;
2345                         uth->uu_cdir = NULLVP;
2346                         if (tvp != NULLVP) {
2347                                 vnode_rele(tvp);
2348                                 return (0);
2349                         }
2350                 }
2351                 return (EBADF);
2352         }
2353
2354         if ( (error = file_vnode(uap->fd, &vp)) )
2355                 return(error);
2356         if ( (error = vnode_getwithref(vp)) ) {
2357                 file_drop(uap->fd);
2358                 return(error);
2359         }
2360
2361         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2362
2363         if (vp->v_type != VDIR) {
2364                 error = ENOTDIR;
2365                 goto out;
2366         }
2367
2368 #if CONFIG_MACF
2369         error = mac_vnode_check_chdir(ctx, vp);
2370         if (error)
2371                 goto out;
2372 #endif
2373         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2374         if (error)
2375                 goto out;
2376
2377         while (!error && (mp = vp->v_mountedhere) != NULL) {
2378                 if (vfs_busy(mp, LK_NOWAIT)) {
2379                         error = EACCES;
2380                         goto out;
2381                 }
2382                 error = VFS_ROOT(mp, &tdp, ctx);
2383                 vfs_unbusy(mp);
2384                 if (error)
2385                         break;
2386                 vnode_put(vp);
2387                 vp = tdp;
2388         }
2389         if (error)
2390                 goto out;
2391         if ( (error = vnode_ref(vp)) )
2392                 goto out;
2393         vnode_put(vp);
2394
2395         if (per_thread) {
2396                 thread_t th = vfs_context_thread(ctx);
2397                 if (th) {
2398                         uthread_t uth = get_bsdthread_info(th);
2399                         tvp = uth->uu_cdir;
2400                         uth->uu_cdir = vp;
2401                         OSBitOrAtomic(P_THCWD, &p->p_flag);
2402                 } else {
2403                         vnode_rele(vp);
2404                         return (ENOENT);
2405                 }
2406         } else {
2407                 proc_fdlock(p);
2408                 tvp = fdp->fd_cdir;
2409                 fdp->fd_cdir = vp;
2410                 proc_fdunlock(p);
2411         }
2412
2413         if (tvp)
2414                 vnode_rele(tvp);
2415         file_drop(uap->fd);
2416
2417         return (0);
2418 out:
2419         vnode_put(vp);
2420         file_drop(uap->fd);
2421
2422         return(error);
2423 }
2424
2425 int
2426 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
2427 {
2428         return common_fchdir(p, uap, 0);
2429 }
2430
2431 int
2432 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
2433 {
2434         return common_fchdir(p, (void *)uap, 1);
2435 }
2436
2437 /*
2438  * Change current working directory (".").
2439  *
2440  * Returns:     0                       Success
2441  *      change_dir:ENOTDIR
2442  *      change_dir:???
2443  *      vnode_ref:ENOENT                No such file or directory
2444  */
2445 /* ARGSUSED */
2446 static int
2447 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
2448 {
2449         struct filedesc *fdp = p->p_fd;
2450         int error;
2451         struct nameidata nd;
2452         vnode_t tvp;
2453         vfs_context_t ctx = vfs_context_current();
2454
2455         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
2456                 UIO_USERSPACE, uap->path, ctx);
2457         error = change_dir(&nd, ctx);
2458         if (error)
2459                 return (error);
2460         if ( (error = vnode_ref(nd.ni_vp)) ) {
2461                 vnode_put(nd.ni_vp);
2462                 return (error);
2463         }
2464         /*
2465          * drop the iocount we picked up in change_dir
2466          */
2467         vnode_put(nd.ni_vp);
2468
2469         if (per_thread) {
2470                 thread_t th = vfs_context_thread(ctx);
2471                 if (th) {
2472                         uthread_t uth = get_bsdthread_info(th);
2473                         tvp = uth->uu_cdir;
2474                         uth->uu_cdir = nd.ni_vp;
2475                         OSBitOrAtomic(P_THCWD, &p->p_flag);
2476                 } else {
2477                         vnode_rele(nd.ni_vp);
2478                         return (ENOENT);
2479                 }
2480         } else {
2481                 proc_fdlock(p);
2482                 tvp = fdp->fd_cdir;
2483                 fdp->fd_cdir = nd.ni_vp;
2484                 proc_fdunlock(p);
2485         }
2486
2487         if (tvp)
2488                 vnode_rele(tvp);
2489
2490         return (0);
2491 }
2492
2493
2494 /*
2495  * chdir
2496  *
2497  * Change current working directory (".") for the entire process
2498  *
2499  * Parameters:  p       Process requesting the call
2500  *              uap     User argument descriptor (see below)
2501  *              retval  (ignored)
2502  *
2503  * Indirect parameters: uap->path       Directory path
2504  *
2505  * Returns:     0                       Success
2506  *              common_chdir: ENOTDIR
2507  *              common_chdir: ENOENT    No such file or directory
2508  *              common_chdir: ???
2509  *
2510  */
2511 int
2512 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
2513 {
2514         return common_chdir(p, (void *)uap, 0);
2515 }
2516
2517 /*
2518  * __pthread_chdir
2519  *
2520  * Change current working directory (".") for a single thread
2521  *
2522  * Parameters:  p       Process requesting the call
2523  *              uap     User argument descriptor (see below)
2524  *              retval  (ignored)
2525  *
2526  * Indirect parameters: uap->path       Directory path
2527  *
2528  * Returns:     0                       Success
2529  *              common_chdir: ENOTDIR
2530  *              common_chdir: ENOENT    No such file or directory
2531  *              common_chdir: ???
2532  *
2533  */
2534 int
2535 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
2536 {
2537         return common_chdir(p, (void *)uap, 1);
2538 }
2539
2540
2541 /*
2542  * Change notion of root (``/'') directory.
2543  */
2544 /* ARGSUSED */
2545 int
2546 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
2547 {
2548         struct filedesc *fdp = p->p_fd;
2549         int error;
2550         struct nameidata nd;
2551         vnode_t tvp;
2552         vfs_context_t ctx = vfs_context_current();
2553
2554         if ((error = suser(kauth_cred_get(), &p->p_acflag)))
2555                 return (error);
2556
2557         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
2558                 UIO_USERSPACE, uap->path, ctx);
2559         error = change_dir(&nd, ctx);
2560         if (error)
2561                 return (error);
2562
2563 #if CONFIG_MACF
2564         error = mac_vnode_check_chroot(ctx, nd.ni_vp,
2565             &nd.ni_cnd);
2566         if (error) {
2567                 vnode_put(nd.ni_vp);
2568                 return (error);
2569         }
2570 #endif
2571
2572         if ( (error = vnode_ref(nd.ni_vp)) ) {
2573                 vnode_put(nd.ni_vp);
2574                 return (error);
2575         }
2576         vnode_put(nd.ni_vp);
2577
2578         proc_fdlock(p);
2579         tvp = fdp->fd_rdir;
2580         fdp->fd_rdir = nd.ni_vp;
2581         fdp->fd_flags |= FD_CHROOT;
2582         proc_fdunlock(p);
2583
2584         if (tvp != NULL)
2585                 vnode_rele(tvp);
2586
2587         return (0);
2588 }
2589
2590 /*
2591  * Common routine for chroot and chdir.
2592  *
2593  * Returns:     0                       Success
2594  *              ENOTDIR                 Not a directory
2595  *              namei:???               [anything namei can return]
2596  *              vnode_authorize:???     [anything vnode_authorize can return]
2597  */
2598 static int
2599 change_dir(struct nameidata *ndp, vfs_context_t ctx)
2600 {
2601         vnode_t vp;
2602         int error;
2603
2604         if ((error = namei(ndp)))
2605                 return (error);
2606         nameidone(ndp);
2607         vp = ndp->ni_vp;
2608
2609         if (vp->v_type != VDIR) {
2610                 vnode_put(vp);
2611                 return (ENOTDIR);
2612         }
2613
2614 #if CONFIG_MACF
2615         error = mac_vnode_check_chdir(ctx, vp);
2616         if (error) {
2617                 vnode_put(vp);
2618                 return (error);
2619         }
2620 #endif
2621
2622         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2623         if (error) {
2624                 vnode_put(vp);
2625                 return (error);
2626         }
2627
2628         return (error);
2629 }
2630
2631 /*
2632  * Check permissions, allocate an open file structure,
2633  * and call the device open routine if any.
2634  *
2635  * Returns:     0                       Success
2636  *              EINVAL
2637  *              EINTR
2638  *      falloc:ENFILE
2639  *      falloc:EMFILE
2640  *      falloc:ENOMEM
2641  *      vn_open_auth:???
2642  *      dupfdopen:???
2643  *      VNOP_ADVLOCK:???
2644  *      vnode_setsize:???
2645  *
2646  * XXX Need to implement uid, gid
2647  */
2648 int
2649 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *vap, int32_t *retval)
2650 {
2651         proc_t p = vfs_context_proc(ctx);
2652         uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2653         struct filedesc *fdp = p->p_fd;
2654         struct fileproc *fp;
2655         vnode_t vp;
2656         int flags, oflags;
2657         struct fileproc *nfp;
2658         int type, indx, error;
2659         struct flock lf;
2660         int no_controlling_tty = 0;
2661         int deny_controlling_tty = 0;
2662         struct session *sessp = SESSION_NULL;
2663         struct vfs_context context = *vfs_context_current();    /* local copy */
2664
2665         oflags = uflags;
2666
2667         if ((oflags & O_ACCMODE) == O_ACCMODE)
2668                 return(EINVAL);
2669         flags = FFLAGS(uflags);
2670
2671         AUDIT_ARG(fflags, oflags);
2672         AUDIT_ARG(mode, vap->va_mode);
2673
2674         if ( (error = falloc(p, &nfp, &indx, ctx)) ) {
2675                 return (error);
2676         }
2677         fp = nfp;
2678         uu->uu_dupfd = -indx - 1;
2679
2680         if (!(p->p_flag & P_CONTROLT)) {
2681                 sessp = proc_session(p);
2682                 no_controlling_tty = 1;
2683                 /*
2684                  * If conditions would warrant getting a controlling tty if
2685                  * the device being opened is a tty (see ttyopen in tty.c),
2686                  * but the open flags deny it, set a flag in the session to
2687                  * prevent it.
2688                  */
2689                 if (SESS_LEADER(p, sessp) &&
2690                     sessp->s_ttyvp == NULL &&
2691                     (flags & O_NOCTTY)) {
2692                         session_lock(sessp);
2693                         sessp->s_flags |= S_NOCTTY;
2694                         session_unlock(sessp);
2695                         deny_controlling_tty = 1;
2696                 }
2697         }
2698
2699         if ((error = vn_open_auth(ndp, &flags, vap))) {
2700                 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){        /* XXX from fdopen */
2701                         if ((error = dupfdopen(fdp, indx, uu->uu_dupfd, flags, error)) == 0) {
2702                                 fp_drop(p, indx, NULL, 0);
2703                                 *retval = indx;
2704                                 if (deny_controlling_tty) {
2705                                         session_lock(sessp);
2706                                         sessp->s_flags &= ~S_NOCTTY;
2707                                         session_unlock(sessp);
2708                                 }
2709                                 if (sessp != SESSION_NULL)
2710                                         session_rele(sessp);
2711                                 return (0);
2712                         }
2713                 }
2714                 if (error == ERESTART)
2715                         error = EINTR;
2716                 fp_free(p, indx, fp);
2717
2718                 if (deny_controlling_tty) {
2719                         session_lock(sessp);
2720                         sessp->s_flags &= ~S_NOCTTY;
2721                         session_unlock(sessp);
2722                 }
2723                 if (sessp != SESSION_NULL)
2724                         session_rele(sessp);
2725                 return (error);
2726         }
2727         uu->uu_dupfd = 0;
2728         vp = ndp->ni_vp;
2729
2730         fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
2731         fp->f_fglob->fg_type = DTYPE_VNODE;
2732         fp->f_fglob->fg_ops = &vnops;
2733         fp->f_fglob->fg_data = (caddr_t)vp;
2734
2735         if (flags & (O_EXLOCK | O_SHLOCK)) {
2736                 lf.l_whence = SEEK_SET;
2737                 lf.l_start = 0;
2738                 lf.l_len = 0;
2739                 if (flags & O_EXLOCK)
2740                         lf.l_type = F_WRLCK;
2741                 else
2742                         lf.l_type = F_RDLCK;
2743                 type = F_FLOCK;
2744                 if ((flags & FNONBLOCK) == 0)
2745                         type |= F_WAIT;
2746 #if CONFIG_MACF
2747                 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
2748                     F_SETLK, &lf);
2749                 if (error)
2750                         goto bad;
2751 #endif
2752                 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx)))
2753                         goto bad;
2754                 fp->f_fglob->fg_flag |= FHASLOCK;
2755         }
2756
2757         /* try to truncate by setting the size attribute */
2758         if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
2759                 goto bad;
2760
2761         /*
2762          * If the open flags denied the acquisition of a controlling tty,
2763          * clear the flag in the session structure that prevented the lower
2764          * level code from assigning one.
2765          */
2766         if (deny_controlling_tty) {
2767                 session_lock(sessp);
2768                 sessp->s_flags &= ~S_NOCTTY;
2769                 session_unlock(sessp);
2770         }
2771
2772         /*
2773          * If a controlling tty was set by the tty line discipline, then we
2774          * want to set the vp of the tty into the session structure.  We have
2775          * a race here because we can't get to the vp for the tp in ttyopen,
2776          * because it's not passed as a parameter in the open path.
2777          */
2778         if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
2779                 vnode_t ttyvp;
2780                 vnode_ref(vp);
2781                 session_lock(sessp);
2782                 ttyvp = sessp->s_ttyvp;
2783                 sessp->s_ttyvp = vp;
2784                 sessp->s_ttyvid = vnode_vid(vp);
2785                 session_unlock(sessp);
2786                 if (ttyvp != NULLVP)
2787                         vnode_rele(ttyvp);
2788         }
2789
2790         vnode_put(vp);
2791
2792         proc_fdlock(p);
2793         procfdtbl_releasefd(p, indx, NULL);
2794         fp_drop(p, indx, fp, 1);
2795         proc_fdunlock(p);
2796
2797         *retval = indx;
2798
2799         if (sessp != SESSION_NULL)
2800                 session_rele(sessp);
2801         return (0);
2802 bad:
2803         if (deny_controlling_tty) {
2804                 session_lock(sessp);
2805                 sessp->s_flags &= ~S_NOCTTY;
2806                 session_unlock(sessp);
2807         }
2808         if (sessp != SESSION_NULL)
2809                 session_rele(sessp);
2810
2811         /* Modify local copy (to not damage thread copy) */
2812         context.vc_ucred = fp->f_fglob->fg_cred;
2813
2814         vn_close(vp, fp->f_fglob->fg_flag, &context);
2815         vnode_put(vp);
2816         fp_free(p, indx, fp);
2817
2818         return (error);
2819
2820 }
2821
2822 /*
2823  * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
2824  *
2825  * Parameters:  p                       Process requesting the open
2826  *              uap                     User argument descriptor (see below)
2827  *              retval                  Pointer to an area to receive the
2828  *                                      return calue from the system call
2829  *
2830  * Indirect:    uap->path               Path to open (same as 'open')
2831  *              uap->flags              Flags to open (same as 'open'
2832  *              uap->uid                UID to set, if creating
2833  *              uap->gid                GID to set, if creating
2834  *              uap->mode               File mode, if creating (same as 'open')
2835  *              uap->xsecurity          ACL to set, if creating
2836  *
2837  * Returns:     0                       Success
2838  *              !0                      errno value
2839  *
2840  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
2841  *
2842  * XXX:         We should enummerate the possible errno values here, and where
2843  *              in the code they originated.
2844  */
2845 int
2846 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
2847 {
2848         struct filedesc *fdp = p->p_fd;
2849         int ciferror;
2850         kauth_filesec_t xsecdst;
2851         struct vnode_attr va;
2852         struct nameidata nd;
2853         int cmode;
2854
2855         AUDIT_ARG(owner, uap->uid, uap->gid);
2856
2857         xsecdst = NULL;
2858         if ((uap->xsecurity != USER_ADDR_NULL) &&
2859             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
2860                 return ciferror;
2861
2862         VATTR_INIT(&va);
2863         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2864         VATTR_SET(&va, va_mode, cmode);
2865         if (uap->uid != KAUTH_UID_NONE)
2866                 VATTR_SET(&va, va_uid, uap->uid);
2867         if (uap->gid != KAUTH_GID_NONE)
2868                 VATTR_SET(&va, va_gid, uap->gid);
2869         if (xsecdst != NULL)
2870                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
2871
2872         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
2873
2874         ciferror = open1(vfs_context_current(), &nd, uap->flags, &va, retval);
2875         if (xsecdst != NULL)
2876                 kauth_filesec_free(xsecdst);
2877
2878         return ciferror;
2879 }
2880
2881 int
2882 open(proc_t p, struct open_args *uap, int32_t *retval)
2883 {
2884         __pthread_testcancel(1);
2885         return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
2886 }
2887
2888 int
2889 open_nocancel(proc_t p, struct open_nocancel_args *uap, int32_t *retval)
2890 {
2891         struct filedesc *fdp = p->p_fd;
2892         struct vnode_attr va;
2893         struct nameidata nd;
2894         int cmode;
2895
2896         VATTR_INIT(&va);
2897         /* Mask off all but regular access permissions */
2898         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2899         VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
2900
2901         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
2902
2903         return(open1(vfs_context_current(), &nd, uap->flags, &va, retval));
2904 }
2905
2906
2907 /*
2908  * Create a special file.
2909  */
2910 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
2911
2912 int
2913 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
2914 {
2915         struct vnode_attr va;
2916         vfs_context_t ctx = vfs_context_current();
2917         int error;
2918         int whiteout = 0;
2919         struct nameidata nd;
2920         vnode_t vp, dvp;
2921
2922         VATTR_INIT(&va);
2923         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2924         VATTR_SET(&va, va_rdev, uap->dev);
2925
2926         /* If it's a mknod() of a FIFO, call mkfifo1() instead */
2927         if ((uap->mode & S_IFMT) == S_IFIFO)
2928                 return(mkfifo1(ctx, uap->path, &va));
2929
2930         AUDIT_ARG(mode, uap->mode);
2931         AUDIT_ARG(value32, uap->dev);
2932
2933         if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
2934                 return (error);
2935         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
2936                 UIO_USERSPACE, uap->path, ctx);
2937         error = namei(&nd);
2938         if (error)
2939                 return (error);
2940         dvp = nd.ni_dvp;
2941         vp = nd.ni_vp;
2942
2943         if (vp != NULL) {
2944                 error = EEXIST;
2945                 goto out;
2946         }
2947
2948         switch (uap->mode & S_IFMT) {
2949         case S_IFMT:    /* used by badsect to flag bad sectors */
2950                 VATTR_SET(&va, va_type, VBAD);
2951                 break;
2952         case S_IFCHR:
2953                 VATTR_SET(&va, va_type, VCHR);
2954                 break;
2955         case S_IFBLK:
2956                 VATTR_SET(&va, va_type, VBLK);
2957                 break;
2958         case S_IFWHT:
2959                 whiteout = 1;
2960                 break;
2961         default:
2962                 error = EINVAL;
2963                 goto out;
2964         }
2965
2966 #if CONFIG_MACF
2967         if (!whiteout) {
2968                 error = mac_vnode_check_create(ctx,
2969                     nd.ni_dvp, &nd.ni_cnd, &va);
2970                 if (error)
2971                         goto out;
2972         }
2973 #endif
2974
2975         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2976                 goto out;
2977
2978         if (whiteout) {
2979                 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, CREATE, ctx);
2980         } else {
2981                 error = vn_create(dvp, &vp, &nd.ni_cnd, &va, 0, ctx);
2982         }
2983         if (error)
2984                 goto out;
2985
2986         if (vp) {
2987                 int     update_flags = 0;
2988
2989                 // Make sure the name & parent pointers are hooked up
2990                 if (vp->v_name == NULL)
2991                         update_flags |= VNODE_UPDATE_NAME;
2992                 if (vp->v_parent == NULLVP)
2993                         update_flags |= VNODE_UPDATE_PARENT;
2994
2995                 if (update_flags)
2996                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
2997
2998 #if CONFIG_FSE
2999                 add_fsevent(FSE_CREATE_FILE, ctx,
3000                     FSE_ARG_VNODE, vp,
3001                     FSE_ARG_DONE);
3002 #endif
3003         }
3004
3005 out:
3006         /*
3007          * nameidone has to happen before we vnode_put(dvp)
3008          * since it may need to release the fs_nodelock on the dvp
3009          */
3010         nameidone(&nd);
3011
3012         if (vp)
3013                 vnode_put(vp);
3014         vnode_put(dvp);
3015
3016         return (error);
3017 }
3018
3019 /*
3020  * Create a named pipe.
3021  *
3022  * Returns:     0                       Success
3023  *              EEXIST
3024  *      namei:???
3025  *      vnode_authorize:???
3026  *      vn_create:???
3027  */
3028 static int
3029 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
3030 {
3031         vnode_t vp, dvp;
3032         int error;
3033         struct nameidata nd;
3034
3035         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
3036                 UIO_USERSPACE, upath, ctx);
3037         error = namei(&nd);
3038         if (error)
3039                 return (error);
3040         dvp = nd.ni_dvp;
3041         vp = nd.ni_vp;
3042
3043         /* check that this is a new file and authorize addition */
3044         if (vp != NULL) {
3045                 error = EEXIST;
3046                 goto out;
3047         }
3048         VATTR_SET(vap, va_type, VFIFO);
3049
3050 #if CONFIG_MACF
3051         error = mac_vnode_check_create(ctx, nd.ni_dvp,
3052             &nd.ni_cnd, vap);
3053         if (error)
3054                 goto out;
3055 #endif
3056
3057
3058         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3059                 goto out;
3060
3061
3062         error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx);
3063 out:
3064         /*
3065          * nameidone has to happen before we vnode_put(dvp)
3066          * since it may need to release the fs_nodelock on the dvp
3067          */
3068         nameidone(&nd);
3069
3070         if (vp)
3071                 vnode_put(vp);
3072         vnode_put(dvp);
3073
3074         return error;
3075 }
3076
3077
3078 /*
3079  * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
3080  *
3081  * Parameters:  p                       Process requesting the open
3082  *              uap                     User argument descriptor (see below)
3083  *              retval                  (Ignored)
3084  *
3085  * Indirect:    uap->path               Path to fifo (same as 'mkfifo')
3086  *              uap->uid                UID to set
3087  *              uap->gid                GID to set
3088  *              uap->mode               File mode to set (same as 'mkfifo')
3089  *              uap->xsecurity          ACL to set, if creating
3090  *
3091  * Returns:     0                       Success
3092  *              !0                      errno value
3093  *
3094  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
3095  *
3096  * XXX:         We should enummerate the possible errno values here, and where
3097  *              in the code they originated.
3098  */
3099 int
3100 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
3101 {
3102         int ciferror;
3103         kauth_filesec_t xsecdst;
3104         struct vnode_attr va;
3105
3106         AUDIT_ARG(owner, uap->uid, uap->gid);
3107
3108         xsecdst = KAUTH_FILESEC_NONE;
3109         if (uap->xsecurity != USER_ADDR_NULL) {
3110                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
3111                         return ciferror;
3112         }
3113
3114         VATTR_INIT(&va);
3115         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3116         if (uap->uid != KAUTH_UID_NONE)
3117                 VATTR_SET(&va, va_uid, uap->uid);
3118         if (uap->gid != KAUTH_GID_NONE)
3119                 VATTR_SET(&va, va_gid, uap->gid);
3120         if (xsecdst != KAUTH_FILESEC_NONE)
3121                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3122
3123         ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
3124
3125         if (xsecdst != KAUTH_FILESEC_NONE)
3126                 kauth_filesec_free(xsecdst);
3127         return ciferror;
3128 }
3129
3130 /* ARGSUSED */
3131 int
3132 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
3133 {
3134         struct vnode_attr va;
3135
3136         VATTR_INIT(&va);
3137         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3138
3139         return(mkfifo1(vfs_context_current(), uap->path, &va));
3140 }
3141
3142
3143 static char *
3144 my_strrchr(char *p, int ch)
3145 {
3146         char *save;
3147
3148         for (save = NULL;; ++p) {
3149                 if (*p == ch)
3150                         save = p;
3151                 if (!*p)
3152                         return(save);
3153         }
3154         /* NOTREACHED */
3155 }
3156
3157 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
3158
3159 int
3160 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
3161 {
3162         int ret, len = _len;
3163
3164         *truncated_path = 0;
3165         ret = vn_getpath(dvp, path, &len);
3166         if (ret == 0 && len < (MAXPATHLEN - 1)) {
3167                 if (leafname) {
3168                         path[len-1] = '/';
3169                         len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
3170                         if (len > MAXPATHLEN) {
3171                                 char *ptr;
3172
3173                                 // the string got truncated!
3174                                 *truncated_path = 1;
3175                                 ptr = my_strrchr(path, '/');
3176                                 if (ptr) {
3177                                         *ptr = '\0';   // chop off the string at the last directory component
3178                                 }
3179                                 len = strlen(path) + 1;
3180                         }
3181                 }
3182         } else if (ret == 0) {
3183                 *truncated_path = 1;
3184         } else if (ret != 0) {
3185                 struct vnode *mydvp=dvp;
3186
3187                 if (ret != ENOSPC) {
3188                         printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
3189                                dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
3190                 }
3191                 *truncated_path = 1;
3192
3193                 do {
3194                         if (mydvp->v_parent != NULL) {
3195                                 mydvp = mydvp->v_parent;
3196                         } else if (mydvp->v_mount) {
3197                                 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
3198                                 break;
3199                         } else {
3200                                 // no parent and no mount point?  only thing is to punt and say "/" changed
3201                                 strlcpy(path, "/", _len);
3202                                 len = 2;
3203                                 mydvp = NULL;
3204                         }
3205
3206                         if (mydvp == NULL) {
3207                                 break;
3208                         }
3209
3210                         len = _len;
3211                         ret = vn_getpath(mydvp, path, &len);
3212                 } while (ret == ENOSPC);
3213         }
3214
3215         return len;
3216 }
3217
3218
3219 /*
3220  * Make a hard file link.
3221  *
3222  * Returns:     0                       Success
3223  *              EPERM
3224  *              EEXIST
3225  *              EXDEV
3226  *      namei:???
3227  *      vnode_authorize:???
3228  *      VNOP_LINK:???
3229  */
3230 /* ARGSUSED */
3231 int
3232 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
3233 {
3234         vnode_t vp, dvp, lvp;
3235         struct nameidata nd;
3236         vfs_context_t ctx = vfs_context_current();
3237         int error;
3238 #if CONFIG_FSE
3239         fse_info finfo;
3240 #endif
3241         int need_event, has_listeners;
3242         char *target_path = NULL;
3243         int truncated=0;
3244
3245         vp = dvp = lvp = NULLVP;
3246
3247         /* look up the object we are linking to */
3248         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
3249                 UIO_USERSPACE, uap->path, ctx);
3250         error = namei(&nd);
3251         if (error)
3252                 return (error);
3253         vp = nd.ni_vp;
3254
3255         nameidone(&nd);
3256
3257         /*
3258          * Normally, linking to directories is not supported.
3259          * However, some file systems may have limited support.
3260          */
3261         if (vp->v_type == VDIR) {
3262                 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
3263                         error = EPERM;   /* POSIX */
3264                         goto out;
3265                 }
3266                 /* Linking to a directory requires ownership. */
3267                 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
3268                         struct vnode_attr dva;
3269
3270                         VATTR_INIT(&dva);
3271                         VATTR_WANTED(&dva, va_uid);
3272                         if (vnode_getattr(vp, &dva, ctx) != 0 ||
3273                             !VATTR_IS_SUPPORTED(&dva, va_uid) ||
3274                             (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
3275                                 error = EACCES;
3276                                 goto out;
3277                         }
3278                 }
3279         }
3280
3281         /* lookup the target node */
3282         nd.ni_cnd.cn_nameiop = CREATE;
3283         nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
3284         nd.ni_dirp = uap->link;
3285         error = namei(&nd);
3286         if (error != 0)
3287                 goto out;
3288         dvp = nd.ni_dvp;
3289         lvp = nd.ni_vp;
3290
3291 #if CONFIG_MACF
3292         if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
3293                 goto out2;
3294 #endif
3295
3296         /* or to anything that kauth doesn't want us to (eg. immutable items) */
3297         if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
3298                 goto out2;
3299
3300         /* target node must not exist */
3301         if (lvp != NULLVP) {
3302                 error = EEXIST;
3303                 goto out2;
3304         }
3305         /* cannot link across mountpoints */
3306         if (vnode_mount(vp) != vnode_mount(dvp)) {
3307                 error = EXDEV;
3308                 goto out2;
3309         }
3310
3311         /* authorize creation of the target note */
3312         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3313                 goto out2;
3314
3315         /* and finally make the link */
3316         error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
3317         if (error)
3318                 goto out2;
3319
3320 #if CONFIG_FSE
3321         need_event = need_fsevent(FSE_CREATE_FILE, dvp);
3322 #else
3323         need_event = 0;
3324 #endif
3325         has_listeners = kauth_authorize_fileop_has_listeners();
3326
3327         if (need_event || has_listeners) {
3328                 char *link_to_path = NULL;
3329                 int len, link_name_len;
3330
3331                 /* build the path to the new link file */
3332                 GET_PATH(target_path);
3333                 if (target_path == NULL) {
3334                         error = ENOMEM;
3335                         goto out2;
3336                 }
3337
3338                 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
3339
3340                 if (has_listeners) {
3341                         /* build the path to file we are linking to */
3342                         GET_PATH(link_to_path);
3343                         if (link_to_path == NULL) {
3344                                 error = ENOMEM;
3345                                 goto out2;
3346                         }
3347
3348                         link_name_len = MAXPATHLEN;
3349                         vn_getpath(vp, link_to_path, &link_name_len);
3350
3351                         /*
3352                          * Call out to allow 3rd party notification of rename.
3353                          * Ignore result of kauth_authorize_fileop call.
3354                          */
3355                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
3356                                                (uintptr_t)link_to_path, (uintptr_t)target_path);
3357                         if (link_to_path != NULL) {
3358                                 RELEASE_PATH(link_to_path);
3359                         }
3360                 }
3361 #if CONFIG_FSE
3362                 if (need_event) {
3363                         /* construct fsevent */
3364                         if (get_fse_info(vp, &finfo, ctx) == 0) {
3365                                 if (truncated) {
3366                                         finfo.mode |= FSE_TRUNCATED_PATH;
3367                                 }
3368
3369                                 // build the path to the destination of the link
3370                                 add_fsevent(FSE_CREATE_FILE, ctx,
3371                                             FSE_ARG_STRING, len, target_path,
3372                                             FSE_ARG_FINFO, &finfo,
3373                                             FSE_ARG_DONE);
3374                         }
3375                         if (vp->v_parent) {
3376                             add_fsevent(FSE_STAT_CHANGED, ctx,
3377                                 FSE_ARG_VNODE, vp->v_parent,
3378                                 FSE_ARG_DONE);
3379                         }
3380                 }
3381 #endif
3382         }
3383 out2:
3384         /*
3385          * nameidone has to happen before we vnode_put(dvp)
3386          * since it may need to release the fs_nodelock on the dvp
3387          */
3388         nameidone(&nd);
3389         if (target_path != NULL) {
3390                 RELEASE_PATH(target_path);
3391         }
3392 out:
3393         if (lvp)
3394                 vnode_put(lvp);
3395         if (dvp)
3396                 vnode_put(dvp);
3397         vnode_put(vp);
3398         return (error);
3399 }
3400
3401 /*
3402  * Make a symbolic link.
3403  *
3404  * We could add support for ACLs here too...
3405  */
3406 /* ARGSUSED */
3407 int
3408 symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval)
3409 {
3410         struct vnode_attr va;
3411         char *path;
3412         int error;
3413         struct nameidata nd;
3414         vfs_context_t ctx = vfs_context_current();
3415         vnode_t vp, dvp;
3416         size_t dummy=0;
3417
3418         MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
3419         error = copyinstr(uap->path, path, MAXPATHLEN, &dummy);
3420         if (error)
3421                 goto out;
3422         AUDIT_ARG(text, path);  /* This is the link string */
3423
3424         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
3425                 UIO_USERSPACE, uap->link, ctx);
3426         error = namei(&nd);
3427         if (error)
3428                 goto out;
3429         dvp = nd.ni_dvp;
3430         vp = nd.ni_vp;
3431
3432         VATTR_INIT(&va);
3433         VATTR_SET(&va, va_type, VLNK);
3434         VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
3435 #if CONFIG_MACF
3436         error = mac_vnode_check_create(ctx,
3437                         dvp, &nd.ni_cnd, &va);
3438 #endif
3439         if (error != 0) {
3440             goto skipit;
3441         }
3442
3443         if (vp != NULL) {
3444             error = EEXIST;
3445             goto skipit;
3446         }
3447
3448         /* authorize */
3449         if (error == 0)
3450                 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
3451         /* get default ownership, etc. */
3452         if (error == 0)
3453                 error = vnode_authattr_new(dvp, &va, 0, ctx);
3454         if (error == 0)
3455                 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
3456
3457         /* do fallback attribute handling */
3458         if (error == 0)
3459                 error = vnode_setattr_fallback(vp, &va, ctx);
3460
3461         if (error == 0) {
3462                 int     update_flags = 0;
3463
3464                 if (vp == NULL) {
3465                         nd.ni_cnd.cn_nameiop = LOOKUP;
3466                         nd.ni_cnd.cn_flags = 0;
3467                         error = namei(&nd);
3468                         vp = nd.ni_vp;
3469
3470                         if (vp == NULL)
3471                                 goto skipit;
3472                 }
3473
3474 #if 0  /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
3475                 /* call out to allow 3rd party notification of rename.
3476                  * Ignore result of kauth_authorize_fileop call.
3477                  */
3478                 if (kauth_authorize_fileop_has_listeners() &&
3479                     namei(&nd) == 0) {
3480                         char *new_link_path = NULL;
3481                         int             len;
3482
3483                         /* build the path to the new link file */
3484                         new_link_path = get_pathbuff();
3485                         len = MAXPATHLEN;
3486                         vn_getpath(dvp, new_link_path, &len);
3487                         if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
3488                                 new_link_path[len - 1] = '/';
3489                                 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
3490                         }
3491
3492                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
3493                                            (uintptr_t)path, (uintptr_t)new_link_path);
3494                         if (new_link_path != NULL)
3495                                 release_pathbuff(new_link_path);
3496                 }
3497 #endif
3498                 // Make sure the name & parent pointers are hooked up
3499                 if (vp->v_name == NULL)
3500                         update_flags |= VNODE_UPDATE_NAME;
3501                 if (vp->v_parent == NULLVP)
3502                         update_flags |= VNODE_UPDATE_PARENT;
3503
3504                 if (update_flags)
3505                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3506
3507 #if CONFIG_FSE
3508                 add_fsevent(FSE_CREATE_FILE, ctx,
3509                             FSE_ARG_VNODE, vp,
3510                             FSE_ARG_DONE);
3511 #endif
3512         }
3513
3514 skipit:
3515         /*
3516          * nameidone has to happen before we vnode_put(dvp)
3517          * since it may need to release the fs_nodelock on the dvp
3518          */
3519         nameidone(&nd);
3520
3521         if (vp)
3522                 vnode_put(vp);
3523         vnode_put(dvp);
3524 out:
3525         FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
3526
3527         return (error);
3528 }
3529
3530 /*
3531  * Delete a whiteout from the filesystem.
3532  * XXX authorization not implmented for whiteouts
3533  */
3534 int
3535 undelete(__unused proc_t p, struct undelete_args *uap, __unused int32_t *retval)
3536 {
3537         int error;
3538         struct nameidata nd;
3539         vfs_context_t ctx = vfs_context_current();
3540         vnode_t vp, dvp;
3541
3542         NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT|AUDITVNPATH1,
3543                 UIO_USERSPACE, uap->path, ctx);
3544         error = namei(&nd);
3545         if (error)
3546                 return (error);
3547         dvp = nd.ni_dvp;
3548         vp = nd.ni_vp;
3549
3550         if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) {
3551                 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, ctx);
3552         } else
3553                 error = EEXIST;
3554
3555         /*
3556          * nameidone has to happen before we vnode_put(dvp)
3557          * since it may need to release the fs_nodelock on the dvp
3558          */
3559         nameidone(&nd);
3560
3561         if (vp)
3562                 vnode_put(vp);
3563         vnode_put(dvp);
3564
3565         return (error);
3566 }
3567
3568
3569 /*
3570  * Delete a name from the filesystem.
3571  */
3572 /* ARGSUSED */
3573 int
3574 unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy)
3575 {
3576         vnode_t vp, dvp;
3577         int error;
3578         struct componentname *cnp;
3579         char  *path = NULL;
3580         int  len=0;
3581 #if CONFIG_FSE
3582         fse_info  finfo;
3583 #endif
3584         int flags = 0;
3585         int need_event = 0;
3586         int has_listeners = 0;
3587         int truncated_path=0;
3588 #if NAMEDRSRCFORK
3589         /* unlink or delete is allowed on rsrc forks and named streams */
3590         ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
3591 #endif
3592
3593         ndp->ni_cnd.cn_flags |= LOCKPARENT;
3594         cnp = &ndp->ni_cnd;
3595
3596         error = namei(ndp);
3597         if (error)
3598                 return (error);
3599
3600         dvp = ndp->ni_dvp;
3601         vp = ndp->ni_vp;
3602
3603         /* With Carbon delete semantics, busy files cannot be deleted */
3604         if (nodelbusy) {
3605                 flags |= VNODE_REMOVE_NODELETEBUSY;
3606         }
3607
3608         /*
3609          * Normally, unlinking of directories is not supported.
3610          * However, some file systems may have limited support.
3611          */
3612         if ((vp->v_type == VDIR) &&
3613             !(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
3614                 error = EPERM;  /* POSIX */
3615         }
3616
3617         /*
3618          * The root of a mounted filesystem cannot be deleted.
3619          */
3620         if (vp->v_flag & VROOT) {
3621                 error = EBUSY;
3622         }
3623         if (error)
3624                 goto out;
3625
3626
3627         /* authorize the delete operation */
3628 #if CONFIG_MACF
3629         if (!error)
3630                 error = mac_vnode_check_unlink(ctx,
3631                     dvp, vp, cnp);
3632 #endif /* MAC */
3633         if (!error)
3634                 error = vnode_authorize(vp, ndp->ni_dvp, KAUTH_VNODE_DELETE, ctx);
3635         if (error)
3636                 goto out;
3637
3638 #if CONFIG_FSE
3639         need_event = need_fsevent(FSE_DELETE, dvp);
3640         if (need_event) {
3641                 if ((vp->v_flag & VISHARDLINK) == 0) {
3642                         get_fse_info(vp, &finfo, ctx);
3643                 }
3644         }
3645 #endif
3646         has_listeners = kauth_authorize_fileop_has_listeners();
3647         if (need_event || has_listeners) {
3648                 GET_PATH(path);
3649                 if (path == NULL) {
3650                         error = ENOMEM;
3651                         goto out;
3652                 }
3653
3654                 len = safe_getpath(dvp, ndp->ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
3655         }
3656
3657 #if NAMEDRSRCFORK
3658         if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK)
3659                 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
3660         else
3661 #endif
3662                 error = VNOP_REMOVE(dvp, vp, &ndp->ni_cnd, flags, ctx);
3663
3664         /*
3665          * Call out to allow 3rd party notification of delete.
3666          * Ignore result of kauth_authorize_fileop call.
3667          */
3668         if (!error) {
3669                 if (has_listeners) {
3670                         kauth_authorize_fileop(vfs_context_ucred(ctx),
3671                                 KAUTH_FILEOP_DELETE,
3672                                 (uintptr_t)vp,
3673                                 (uintptr_t)path);
3674                 }
3675
3676                 if (vp->v_flag & VISHARDLINK) {
3677                     //
3678                     // if a hardlink gets deleted we want to blow away the
3679                     // v_parent link because the path that got us to this
3680                     // instance of the link is no longer valid.  this will
3681                     // force the next call to get the path to ask the file
3682                     // system instead of just following the v_parent link.
3683                     //
3684                     vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
3685                 }
3686
3687 #if CONFIG_FSE
3688                 if (need_event) {
3689                         if (vp->v_flag & VISHARDLINK) {
3690                                 get_fse_info(vp, &finfo, ctx);
3691                         }
3692                         if (truncated_path) {
3693                                 finfo.mode |= FSE_TRUNCATED_PATH;
3694                         }
3695                         add_fsevent(FSE_DELETE, ctx,
3696                                                 FSE_ARG_STRING, len, path,
3697                                                 FSE_ARG_FINFO, &finfo,
3698                                                 FSE_ARG_DONE);
3699                 }
3700 #endif
3701         }
3702         if (path != NULL)
3703                 RELEASE_PATH(path);
3704
3705         /*
3706          * nameidone has to happen before we vnode_put(dvp)
3707          * since it may need to release the fs_nodelock on the dvp
3708          */
3709 out:
3710 #if NAMEDRSRCFORK
3711         /* recycle the deleted rsrc fork vnode to force a reclaim, which
3712          * will cause its shadow file to go away if necessary.
3713          */
3714          if ((vnode_isnamedstream(ndp->ni_vp)) &&
3715                 (ndp->ni_vp->v_parent != NULLVP) &&
3716                 vnode_isshadow(ndp->ni_vp)) {
3717                         vnode_recycle(ndp->ni_vp);
3718          }
3719 #endif
3720         nameidone(ndp);
3721         vnode_put(dvp);
3722         vnode_put(vp);
3723         return (error);
3724 }
3725
3726 /*
3727  * Delete a name from the filesystem using POSIX semantics.
3728  */
3729 int
3730 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
3731 {
3732         struct nameidata nd;
3733         vfs_context_t ctx = vfs_context_current();
3734
3735         NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
3736         return unlink1(ctx, &nd, 0);
3737 }
3738
3739 /*
3740  * Delete a name from the filesystem using Carbon semantics.
3741  */
3742 int
3743 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
3744 {
3745         struct nameidata nd;
3746         vfs_context_t ctx = vfs_context_current();
3747
3748         NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
3749         return unlink1(ctx, &nd, 1);
3750 }
3751
3752 /*
3753  * Reposition read/write file offset.
3754  */
3755 int
3756 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
3757 {
3758         struct fileproc *fp;
3759         vnode_t vp;
3760         struct vfs_context *ctx;
3761         off_t offset = uap->offset, file_size;
3762         int error;
3763
3764         if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
3765                 if (error == ENOTSUP)
3766                         return (ESPIPE);
3767                 return (error);
3768         }
3769         if (vnode_isfifo(vp)) {
3770                 file_drop(uap->fd);
3771                 return(ESPIPE);
3772         }
3773
3774
3775         ctx = vfs_context_current();
3776 #if CONFIG_MACF
3777         if (uap->whence == L_INCR && uap->offset == 0)
3778                 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
3779                     fp->f_fglob);
3780         else
3781                 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
3782                     fp->f_fglob);
3783         if (error) {
3784                 file_drop(uap->fd);
3785                 return (error);
3786         }
3787 #endif
3788         if ( (error = vnode_getwithref(vp)) ) {
3789                 file_drop(uap->fd);
3790                 return(error);
3791         }
3792
3793         switch (uap->whence) {
3794         case L_INCR:
3795                 offset += fp->f_fglob->fg_offset;
3796                 break;
3797         case L_XTND:
3798                 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
3799                         break;
3800                 offset += file_size;
3801                 break;
3802         case L_SET:
3803                 break;
3804         default:
3805                 error = EINVAL;
3806         }
3807         if (error == 0) {
3808                 if (uap->offset > 0 && offset < 0) {
3809                         /* Incremented/relative move past max size */
3810                         error = EOVERFLOW;
3811                 } else {
3812                         /*
3813                          * Allow negative offsets on character devices, per
3814                          * POSIX 1003.1-2001.  Most likely for writing disk
3815                          * labels.
3816                          */
3817                         if (offset < 0 && vp->v_type != VCHR) {
3818                                 /* Decremented/relative move before start */
3819                                 error = EINVAL;
3820                         } else {
3821                                 /* Success */
3822                                 fp->f_fglob->fg_offset = offset;
3823                                 *retval = fp->f_fglob->fg_offset;
3824                         }
3825                 }
3826         }
3827
3828         /*
3829          * An lseek can affect whether data is "available to read."  Use
3830          * hint of NOTE_NONE so no EVFILT_VNODE events fire
3831          */
3832         post_event_if_success(vp, error, NOTE_NONE);
3833         (void)vnode_put(vp);
3834         file_drop(uap->fd);
3835         return (error);
3836 }
3837
3838
3839 /*
3840  * Check access permissions.
3841  *
3842  * Returns:     0                       Success
3843  *              vnode_authorize:???
3844  */
3845 static int
3846 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
3847 {
3848         kauth_action_t action;
3849         int error;
3850
3851         /*
3852          * If just the regular access bits, convert them to something
3853          * that vnode_authorize will understand.
3854          */
3855         if (!(uflags & _ACCESS_EXTENDED_MASK)) {
3856                 action = 0;
3857                 if (uflags & R_OK)
3858                         action |= KAUTH_VNODE_READ_DATA;        /* aka KAUTH_VNODE_LIST_DIRECTORY */
3859                 if (uflags & W_OK) {
3860                         if (vnode_isdir(vp)) {
3861                                 action |= KAUTH_VNODE_ADD_FILE |
3862                                     KAUTH_VNODE_ADD_SUBDIRECTORY;
3863                                 /* might want delete rights here too */
3864                         } else {
3865                                 action |= KAUTH_VNODE_WRITE_DATA;
3866                         }
3867                 }
3868                 if (uflags & X_OK) {
3869                         if (vnode_isdir(vp)) {
3870                                 action |= KAUTH_VNODE_SEARCH;
3871                         } else {
3872                                 action |= KAUTH_VNODE_EXECUTE;
3873                         }
3874                 }
3875         } else {
3876                 /* take advantage of definition of uflags */
3877                 action = uflags >> 8;
3878         }
3879
3880 #if CONFIG_MACF
3881         error = mac_vnode_check_access(ctx, vp, uflags);
3882         if (error)
3883                 return (error);
3884 #endif /* MAC */
3885
3886         /* action == 0 means only check for existence */
3887         if (action != 0) {
3888                 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
3889         } else {
3890                 error = 0;
3891         }
3892
3893         return(error);
3894 }
3895
3896
3897
3898 /*
3899  * access_extended: Check access permissions in bulk.
3900  *
3901  * Description: uap->entries            Pointer to an array of accessx
3902  *                                      descriptor structs, plus one or
3903  *                                      more NULL terminated strings (see
3904  *                                      "Notes" section below).
3905  *              uap->size               Size of the area pointed to by
3906  *                                      uap->entries.
3907  *              uap->results            Pointer to the results array.
3908  *
3909  * Returns:     0                       Success
3910  *              ENOMEM                  Insufficient memory
3911  *              EINVAL                  Invalid arguments
3912  *              namei:EFAULT            Bad address
3913  *              namei:ENAMETOOLONG      Filename too long
3914  *              namei:ENOENT            No such file or directory
3915  *              namei:ELOOP             Too many levels of symbolic links
3916  *              namei:EBADF             Bad file descriptor
3917  *              namei:ENOTDIR           Not a directory
3918  *              namei:???
3919  *              access1:
3920  *
3921  * Implicit returns:
3922  *              uap->results            Array contents modified
3923  *
3924  * Notes:       The uap->entries are structured as an arbitrary length array
3925  *              of accessx descriptors, followed by one or more NULL terminated
3926  *              strings
3927  *
3928  *                      struct accessx_descriptor[0]
3929  *                      ...
3930  *                      struct accessx_descriptor[n]
3931  *                      char name_data[0];
3932  *
3933  *              We determine the entry count by walking the buffer containing
3934  *              the uap->entries argument descriptor.  For each descriptor we
3935  *              see, the valid values for the offset ad_name_offset will be
3936  *              in the byte range:
3937  *
3938  *                      [ uap->entries + sizeof(struct accessx_descriptor) ]
3939  *                                              to
3940  *                              [ uap->entries + uap->size - 2 ]
3941  *
3942  *              since we must have at least one string, and the string must
3943  *              be at least one character plus the NULL terminator in length.
3944  *
3945  * XXX:         Need to support the check-as uid argument
3946  */
3947 int
3948 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
3949 {
3950         struct accessx_descriptor *input = NULL;
3951         errno_t *result = NULL;
3952         errno_t error = 0;
3953         int wantdelete = 0;
3954         unsigned int desc_max, desc_actual, i, j;
3955         struct vfs_context context;
3956         struct nameidata nd;
3957         int niopts;
3958         vnode_t vp = NULL;
3959         vnode_t dvp = NULL;
3960 #define ACCESSX_MAX_DESCR_ON_STACK 10
3961         struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
3962
3963         context.vc_ucred = NULL;
3964
3965         /*
3966          * Validate parameters; if valid, copy the descriptor array and string
3967          * arguments into local memory.  Before proceeding, the following
3968          * conditions must have been met:
3969          *
3970          * o    The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
3971          * o    There must be sufficient room in the request for at least one
3972          *      descriptor and a one yte NUL terminated string.
3973          * o    The allocation of local storage must not fail.
3974          */
3975         if (uap->size > ACCESSX_MAX_TABLESIZE)
3976                 return(ENOMEM);
3977         if (uap->size < (sizeof(struct accessx_descriptor) + 2))
3978                 return(EINVAL);
3979         if (uap->size <= sizeof (stack_input)) {
3980                 input = stack_input;
3981         } else {
3982         MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
3983         if (input == NULL) {
3984                 error = ENOMEM;
3985                 goto out;
3986         }
3987         }
3988         error = copyin(uap->entries, input, uap->size);
3989         if (error)
3990                 goto out;
3991
3992         AUDIT_ARG(opaque, input, uap->size);
3993
3994         /*
3995          * Force NUL termination of the copyin buffer to avoid nami() running
3996          * off the end.  If the caller passes us bogus data, they may get a
3997          * bogus result.
3998          */
3999         ((char *)input)[uap->size - 1] = 0;
4000
4001         /*
4002          * Access is defined as checking against the process' real identity,
4003          * even if operations are checking the effective identity.  This
4004          * requires that we use a local vfs context.
4005          */
4006         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
4007         context.vc_thread = current_thread();
4008
4009         /*
4010          * Find out how many entries we have, so we can allocate the result
4011          * array by walking the list and adjusting the count downward by the
4012          * earliest string offset we see.
4013          */
4014         desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
4015         desc_actual = desc_max;
4016         for (i = 0; i < desc_actual; i++) {
4017                 /*
4018                  * Take the offset to the name string for this entry and
4019                  * convert to an input array index, which would be one off
4020                  * the end of the array if this entry was the lowest-addressed
4021                  * name string.
4022                  */
4023                 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
4024
4025                 /*
4026                  * An offset greater than the max allowable offset is an error.
4027                  * It is also an error for any valid entry to point
4028                  * to a location prior to the end of the current entry, if
4029                  * it's not a reference to the string of the previous entry.
4030                  */
4031                 if (j > desc_max || (j != 0 && j <= i)) {
4032                         error = EINVAL;
4033                         goto out;
4034                 }
4035
4036                 /*
4037                  * An offset of 0 means use the previous descriptor's offset;
4038                  * this is used to chain multiple requests for the same file
4039                  * to avoid multiple lookups.
4040                  */
4041                 if (j == 0) {
4042                         /* This is not valid for the first entry */
4043                         if (i == 0) {
4044                                 error = EINVAL;
4045                                 goto out;
4046                         }
4047                         continue;
4048                 }
4049
4050                 /*
4051                  * If the offset of the string for this descriptor is before
4052                  * what we believe is the current actual last descriptor,
4053                  * then we need to adjust our estimate downward; this permits
4054                  * the string table following the last descriptor to be out
4055                  * of order relative to the descriptor list.
4056                  */
4057                 if (j < desc_actual)
4058                         desc_actual = j;
4059         }
4060
4061         /*
4062          * We limit the actual number of descriptors we are willing to process
4063          * to a hard maximum of ACCESSX_MAX_DESCRIPTORS.  If the number being
4064          * requested does not exceed this limit,
4065          */
4066         if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
4067                 error = ENOMEM;
4068                 goto out;
4069         }
4070         MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
4071         if (result == NULL) {
4072                 error = ENOMEM;
4073                 goto out;
4074         }
4075
4076         /*
4077          * Do the work by iterating over the descriptor entries we know to
4078          * at least appear to contain valid data.
4079          */
4080         error = 0;
4081         for (i = 0; i < desc_actual; i++) {
4082                 /*
4083                  * If the ad_name_offset is 0, then we use the previous
4084                  * results to make the check; otherwise, we are looking up
4085                  * a new file name.
4086                  */
4087                 if (input[i].ad_name_offset != 0) {
4088                         /* discard old vnodes */
4089                         if (vp) {
4090                                 vnode_put(vp);
4091                                 vp = NULL;
4092                         }
4093                         if (dvp) {
4094                                 vnode_put(dvp);
4095                                 dvp = NULL;
4096                         }
4097
4098                         /*
4099                          * Scan forward in the descriptor list to see if we
4100                          * need the parent vnode.  We will need it if we are
4101                          * deleting, since we must have rights  to remove
4102                          * entries in the parent directory, as well as the
4103                          * rights to delete the object itself.
4104                          */
4105                         wantdelete = input[i].ad_flags & _DELETE_OK;
4106                         for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
4107                                 if (input[j].ad_flags & _DELETE_OK)
4108                                         wantdelete = 1;
4109
4110                         niopts = FOLLOW | AUDITVNPATH1;
4111
4112                         /* need parent for vnode_authorize for deletion test */
4113                         if (wantdelete)
4114                                 niopts |= WANTPARENT;
4115
4116                         /* do the lookup */
4117                         NDINIT(&nd, LOOKUP, niopts, UIO_SYSSPACE, CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset), &context);
4118                         error = namei(&nd);
4119                         if (!error) {
4120                                 vp = nd.ni_vp;
4121                                 if (wantdelete)
4122                                         dvp = nd.ni_dvp;
4123                         }
4124                         nameidone(&nd);
4125                 }
4126
4127                 /*
4128                  * Handle lookup errors.
4129                  */
4130                 switch(error) {
4131                 case ENOENT:
4132                 case EACCES:
4133                 case EPERM:
4134                 case ENOTDIR:
4135                         result[i] = error;
4136                         break;
4137                 case 0:
4138                         /* run this access check */
4139                         result[i] = access1(vp, dvp, input[i].ad_flags, &context);
4140                         break;
4141                 default:
4142                         /* fatal lookup error */
4143
4144                         goto out;
4145                 }
4146         }
4147
4148         AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
4149
4150         /* copy out results */
4151         error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
4152
4153 out:
4154         if (input && input != stack_input)
4155                 FREE(input, M_TEMP);
4156         if (result)
4157                 FREE(result, M_TEMP);
4158         if (vp)
4159                 vnode_put(vp);
4160         if (dvp)
4161                 vnode_put(dvp);
4162         if (IS_VALID_CRED(context.vc_ucred))
4163                 kauth_cred_unref(&context.vc_ucred);
4164         return(error);
4165 }
4166
4167
4168 /*
4169  * Returns:     0                       Success
4170  *              namei:EFAULT            Bad address
4171  *              namei:ENAMETOOLONG      Filename too long
4172  *              namei:ENOENT            No such file or directory
4173  *              namei:ELOOP             Too many levels of symbolic links
4174  *              namei:EBADF             Bad file descriptor
4175  *              namei:ENOTDIR           Not a directory
4176  *              namei:???
4177  *              access1:
4178  */
4179 int
4180 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
4181 {
4182         int error;
4183         struct nameidata nd;
4184         int niopts;
4185         struct vfs_context context;
4186 #if NAMEDRSRCFORK
4187         int is_namedstream = 0;
4188 #endif
4189
4190         /*
4191          * Access is defined as checking against the process'
4192          * real identity, even if operations are checking the
4193          * effective identity.  So we need to tweak the credential
4194          * in the context.
4195          */
4196         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
4197         context.vc_thread = current_thread();
4198
4199         niopts = FOLLOW | AUDITVNPATH1;
4200         /* need parent for vnode_authorize for deletion test */
4201         if (uap->flags & _DELETE_OK)
4202                 niopts |= WANTPARENT;
4203         NDINIT(&nd, LOOKUP, niopts, UIO_USERSPACE, uap->path, &context);
4204
4205 #if NAMEDRSRCFORK
4206         /* access(F_OK) calls are allowed for resource forks. */
4207         if (uap->flags == F_OK)
4208                 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4209 #endif
4210         error = namei(&nd);
4211         if (error)
4212                 goto out;
4213
4214 #if NAMEDRSRCFORK
4215         /* Grab reference on the shadow stream file vnode to
4216          * force an inactive on release which will mark it
4217          * for recycle.
4218          */
4219         if (vnode_isnamedstream(nd.ni_vp) &&
4220             (nd.ni_vp->v_parent != NULLVP) &&
4221             vnode_isshadow(nd.ni_vp)) {
4222                 is_namedstream = 1;
4223                 vnode_ref(nd.ni_vp);
4224         }
4225 #endif
4226
4227         error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context);
4228
4229 #if NAMEDRSRCFORK
4230         if (is_namedstream) {
4231                 vnode_rele(nd.ni_vp);
4232         }
4233 #endif
4234
4235         vnode_put(nd.ni_vp);
4236         if (uap->flags & _DELETE_OK)
4237                 vnode_put(nd.ni_dvp);
4238         nameidone(&nd);
4239
4240 out:
4241         kauth_cred_unref(&context.vc_ucred);
4242         return(error);
4243 }
4244
4245
4246 /*
4247  * Returns:     0                       Success
4248  *              EFAULT
4249  *      copyout:EFAULT
4250  *      namei:???
4251  *      vn_stat:???
4252  */
4253 static int
4254 stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4255 {
4256         union {
4257                 struct stat sb;
4258                 struct stat64 sb64;
4259         } source;
4260         union {
4261                 struct user64_stat user64_sb;
4262                 struct user32_stat user32_sb;
4263                 struct user64_stat64 user64_sb64;
4264                 struct user32_stat64 user32_sb64;
4265         } dest;
4266         caddr_t sbp;
4267         int error, my_size;
4268         kauth_filesec_t fsec;
4269         size_t xsecurity_bufsize;
4270         void * statptr;
4271
4272 #if NAMEDRSRCFORK
4273         int is_namedstream = 0;
4274         /* stat calls are allowed for resource forks. */
4275         ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4276 #endif
4277         error = namei(ndp);
4278         if (error)
4279                 return (error);
4280         fsec = KAUTH_FILESEC_NONE;
4281
4282         statptr = (void *)&source;
4283
4284 #if NAMEDRSRCFORK
4285         /* Grab reference on the shadow stream file vnode to
4286          * force an inactive on release which will mark it
4287          * for recycle.
4288          */
4289         if (vnode_isnamedstream(ndp->ni_vp) &&
4290             (ndp->ni_vp->v_parent != NULLVP) &&
4291             vnode_isshadow(ndp->ni_vp)) {
4292                 is_namedstream = 1;
4293                 vnode_ref(ndp->ni_vp);
4294         }
4295 #endif
4296
4297         error = vn_stat(ndp->ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
4298
4299 #if NAMEDRSRCFORK
4300         if (is_namedstream) {
4301                 vnode_rele(ndp->ni_vp);
4302         }
4303 #endif
4304         vnode_put(ndp->ni_vp);
4305         nameidone(ndp);
4306
4307         if (error)
4308                 return (error);
4309         /* Zap spare fields */
4310         if (isstat64 != 0) {
4311                 source.sb64.st_lspare = 0;
4312                 source.sb64.st_qspare[0] = 0LL;
4313                 source.sb64.st_qspare[1] = 0LL;
4314                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
4315                         munge_user64_stat64(&source.sb64, &dest.user64_sb64);
4316                         my_size = sizeof(dest.user64_sb64);
4317                         sbp = (caddr_t)&dest.user64_sb64;
4318                 } else {
4319                         munge_user32_stat64(&source.sb64, &dest.user32_sb64);
4320                         my_size = sizeof(dest.user32_sb64);
4321                         sbp = (caddr_t)&dest.user32_sb64;
4322                 }
4323                 /*
4324                  * Check if we raced (post lookup) against the last unlink of a file.
4325                  */
4326                 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
4327                         source.sb64.st_nlink = 1;
4328                 }
4329         } else {
4330                 source.sb.st_lspare = 0;
4331                 source.sb.st_qspare[0] = 0LL;
4332                 source.sb.st_qspare[1] = 0LL;
4333                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
4334                         munge_user64_stat(&source.sb, &dest.user64_sb);
4335                         my_size = sizeof(dest.user64_sb);
4336                         sbp = (caddr_t)&dest.user64_sb;
4337                 } else {
4338                         munge_user32_stat(&source.sb, &dest.user32_sb);
4339                         my_size = sizeof(dest.user32_sb);
4340                         sbp = (caddr_t)&dest.user32_sb;
4341                 }
4342
4343                 /*
4344                  * Check if we raced (post lookup) against the last unlink of a file.
4345                  */
4346                 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
4347                         source.sb.st_nlink = 1;
4348                 }
4349         }
4350         if ((error = copyout(sbp, ub, my_size)) != 0)
4351                 goto out;
4352
4353         /* caller wants extended security information? */
4354         if (xsecurity != USER_ADDR_NULL) {
4355
4356                 /* did we get any? */
4357                 if (fsec == KAUTH_FILESEC_NONE) {
4358                         if (susize(xsecurity_size, 0) != 0) {
4359                                 error = EFAULT;
4360                                 goto out;
4361                         }
4362                 } else {
4363                         /* find the user buffer size */
4364                         xsecurity_bufsize = fusize(xsecurity_size);
4365
4366                         /* copy out the actual data size */
4367                         if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
4368                                 error = EFAULT;
4369                                 goto out;
4370                         }
4371
4372                         /* if the caller supplied enough room, copy out to it */
4373                         if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
4374                                 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
4375                 }
4376         }
4377 out:
4378         if (fsec != KAUTH_FILESEC_NONE)
4379                 kauth_filesec_free(fsec);
4380         return (error);
4381 }
4382
4383 /*
4384  * Get file status; this version follows links.
4385  *
4386  * Returns:     0                       Success
4387  *      stat2:???                       [see stat2() in this file]
4388  */
4389 static int
4390 stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4391 {
4392         struct nameidata nd;
4393         vfs_context_t ctx = vfs_context_current();
4394
4395         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
4396             UIO_USERSPACE, path, ctx);
4397         return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
4398 }
4399
4400 /*
4401  * stat_extended: Get file status; with extended security (ACL).
4402  *
4403  * Parameters:    p                       (ignored)
4404  *                uap                     User argument descriptor (see below)
4405  *                retval                  (ignored)
4406  *
4407  * Indirect:      uap->path               Path of file to get status from
4408  *                uap->ub                 User buffer (holds file status info)
4409  *                uap->xsecurity          ACL to get (extended security)
4410  *                uap->xsecurity_size     Size of ACL
4411  *
4412  * Returns:        0                      Success
4413  *                !0                      errno value
4414  *
4415  */
4416 int
4417 stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused int32_t *retval)
4418 {
4419         return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
4420 }
4421
4422 /*
4423  * Returns:     0                       Success
4424  *      stat1:???                       [see stat1() in this file]
4425  */
4426 int
4427 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
4428 {
4429         return(stat1(uap->path, uap->ub, 0, 0, 0));
4430 }
4431
4432 int
4433 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
4434 {
4435         return(stat1(uap->path, uap->ub, 0, 0, 1));
4436 }
4437
4438 /*
4439  * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
4440  *
4441  * Parameters:    p                       (ignored)
4442  *                uap                     User argument descriptor (see below)
4443  *                retval                  (ignored)
4444  *
4445  * Indirect:      uap->path               Path of file to get status from
4446  *                uap->ub                 User buffer (holds file status info)
4447  *                uap->xsecurity          ACL to get (extended security)
4448  *                uap->xsecurity_size     Size of ACL
4449  *
4450  * Returns:        0                      Success
4451  *                !0                      errno value
4452  *
4453  */
4454 int
4455 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
4456 {
4457         return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
4458 }
4459 /*
4460  * Get file status; this version does not follow links.
4461  */
4462 static int
4463 lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4464 {
4465         struct nameidata nd;
4466         vfs_context_t ctx = vfs_context_current();
4467
4468         NDINIT(&nd, LOOKUP, NOTRIGGER | NOFOLLOW | AUDITVNPATH1,
4469             UIO_USERSPACE, path, ctx);
4470
4471         return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
4472 }
4473
4474 /*
4475  * lstat_extended: Get file status; does not follow links; with extended security (ACL).
4476  *
4477  * Parameters:    p                       (ignored)
4478  *                uap                     User argument descriptor (see below)
4479  *                retval                  (ignored)
4480  *
4481  * Indirect:      uap->path               Path of file to get status from
4482  *                uap->ub                 User buffer (holds file status info)
4483  *                uap->xsecurity          ACL to get (extended security)
4484  *                uap->xsecurity_size     Size of ACL
4485  *
4486  * Returns:        0                      Success
4487  *                !0                      errno value
4488  *
4489  */
4490 int
4491 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
4492 {
4493         return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
4494 }
4495
4496 int
4497 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
4498 {
4499         return(lstat1(uap->path, uap->ub, 0, 0, 0));
4500 }
4501
4502 int
4503 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
4504 {
4505         return(lstat1(uap->path, uap->ub, 0, 0, 1));
4506 }
4507
4508 /*
4509  * lstat64_extended: Get file status; can handle large inode numbers; does not
4510  * follow links; with extended security (ACL).
4511  *
4512  * Parameters:    p                       (ignored)
4513  *                uap                     User argument descriptor (see below)
4514  *                retval                  (ignored)
4515  *
4516  * Indirect:      uap->path               Path of file to get status from
4517  *                uap->ub                 User buffer (holds file status info)
4518  *                uap->xsecurity          ACL to get (extended security)
4519  *                uap->xsecurity_size     Size of ACL
4520  *
4521  * Returns:        0                      Success
4522  *                !0                      errno value
4523  *
4524  */
4525 int
4526 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
4527 {
4528         return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
4529 }
4530
4531 /*
4532  * Get configurable pathname variables.
4533  *
4534  * Returns:     0                       Success
4535  *      namei:???
4536  *      vn_pathconf:???
4537  *
4538  * Notes:       Global implementation  constants are intended to be
4539  *              implemented in this function directly; all other constants
4540  *              are per-FS implementation, and therefore must be handled in
4541  *              each respective FS, instead.
4542  *
4543  * XXX We implement some things globally right now that should actually be
4544  * XXX per-FS; we will need to deal with this at some point.
4545  */
4546 /* ARGSUSED */
4547 int
4548 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
4549 {
4550         int error;
4551         struct nameidata nd;
4552         vfs_context_t ctx = vfs_context_current();
4553
4554         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4555                 UIO_USERSPACE, uap->path, ctx);
4556         error = namei(&nd);
4557         if (error)
4558                 return (error);
4559
4560         error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
4561
4562         vnode_put(nd.ni_vp);
4563         nameidone(&nd);
4564         return (error);
4565 }
4566
4567 /*
4568  * Return target name of a symbolic link.
4569  */
4570 /* ARGSUSED */
4571 int
4572 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
4573 {
4574         vnode_t vp;
4575         uio_t auio;
4576         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
4577         int error;
4578         struct nameidata nd;
4579         vfs_context_t ctx = vfs_context_current();
4580         char uio_buf[ UIO_SIZEOF(1) ];
4581
4582         NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNPATH1,
4583                 UIO_USERSPACE, uap->path, ctx);
4584         error = namei(&nd);
4585         if (error)
4586                 return (error);
4587         vp = nd.ni_vp;
4588
4589         nameidone(&nd);
4590
4591         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
4592                                                                   &uio_buf[0], sizeof(uio_buf));
4593         uio_addiov(auio, uap->buf, uap->count);
4594         if (vp->v_type != VLNK)
4595                 error = EINVAL;
4596         else {
4597 #if CONFIG_MACF
4598                 error = mac_vnode_check_readlink(ctx,
4599                     vp);
4600 #endif
4601                 if (error == 0)
4602                         error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx);
4603                 if (error == 0)
4604                         error = VNOP_READLINK(vp, auio, ctx);
4605         }
4606         vnode_put(vp);
4607
4608         /* Safe: uio_resid() is bounded above by "count", and "count" is an int  */
4609         *retval = uap->count - (int)uio_resid(auio);
4610         return (error);
4611 }
4612
4613 /*
4614  * Change file flags.
4615  */
4616 static int
4617 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
4618 {
4619         struct vnode_attr va;
4620         kauth_action_t action;
4621         int error;
4622
4623         VATTR_INIT(&va);
4624         VATTR_SET(&va, va_flags, flags);
4625
4626 #if CONFIG_MACF
4627         error = mac_vnode_check_setflags(ctx, vp, flags);
4628         if (error)
4629                 goto out;
4630 #endif
4631
4632         /* request authorisation, disregard immutability */
4633         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4634                 goto out;
4635         /*
4636          * Request that the auth layer disregard those file flags it's allowed to when
4637          * authorizing this operation; we need to do this in order to be able to
4638          * clear immutable flags.
4639          */
4640         if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
4641                 goto out;
4642         error = vnode_setattr(vp, &va, ctx);
4643
4644         if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
4645                 error = ENOTSUP;
4646         }
4647 out:
4648         vnode_put(vp);
4649         return(error);
4650 }
4651
4652 /*
4653  * Change flags of a file given a path name.
4654  */
4655 /* ARGSUSED */
4656 int
4657 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
4658 {
4659         vnode_t vp;
4660         vfs_context_t ctx = vfs_context_current();
4661         int error;
4662         struct nameidata nd;
4663
4664         AUDIT_ARG(fflags, uap->flags);
4665         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4666                 UIO_USERSPACE, uap->path, ctx);
4667         error = namei(&nd);
4668         if (error)
4669                 return (error);
4670         vp = nd.ni_vp;
4671         nameidone(&nd);
4672
4673         error = chflags1(vp, uap->flags, ctx);
4674
4675         return(error);
4676 }
4677
4678 /*
4679  * Change flags of a file given a file descriptor.
4680  */
4681 /* ARGSUSED */
4682 int
4683 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
4684 {
4685         vnode_t vp;
4686         int error;
4687
4688         AUDIT_ARG(fd, uap->fd);
4689         AUDIT_ARG(fflags, uap->flags);
4690         if ( (error = file_vnode(uap->fd, &vp)) )
4691                 return (error);
4692
4693         if ((error = vnode_getwithref(vp))) {
4694                 file_drop(uap->fd);
4695                 return(error);
4696         }
4697
4698         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4699
4700         error = chflags1(vp, uap->flags, vfs_context_current());
4701
4702         file_drop(uap->fd);
4703         return (error);
4704 }
4705
4706 /*
4707  * Change security information on a filesystem object.
4708  *
4709  * Returns:     0                       Success
4710  *              EPERM                   Operation not permitted
4711  *              vnode_authattr:???      [anything vnode_authattr can return]
4712  *              vnode_authorize:???     [anything vnode_authorize can return]
4713  *              vnode_setattr:???       [anything vnode_setattr can return]
4714  *
4715  * Notes:       If vnode_authattr or vnode_authorize return EACCES, it will be
4716  *              translated to EPERM before being returned.
4717  */
4718 static int
4719 chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
4720 {
4721         kauth_action_t action;
4722         int error;
4723
4724         AUDIT_ARG(mode, vap->va_mode);
4725         /* XXX audit new args */
4726
4727 #if NAMEDSTREAMS
4728         /* chmod calls are not allowed for resource forks. */
4729         if (vp->v_flag & VISNAMEDSTREAM) {
4730                 return (EPERM);
4731         }
4732 #endif
4733
4734 #if CONFIG_MACF
4735         error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode);
4736         if (error)
4737                 return (error);
4738 #endif
4739
4740         /* make sure that the caller is allowed to set this security information */
4741         if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
4742             ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
4743                 if (error == EACCES)
4744                         error = EPERM;
4745                 return(error);
4746         }
4747
4748         error = vnode_setattr(vp, vap, ctx);
4749
4750         return (error);
4751 }
4752
4753
4754 /*
4755  * Change mode of a file given a path name.
4756  *
4757  * Returns:     0                       Success
4758  *              namei:???               [anything namei can return]
4759  *              chmod2:???              [anything chmod2 can return]
4760  */
4761 static int
4762 chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
4763 {
4764         struct nameidata nd;
4765         int error;
4766
4767         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4768                 UIO_USERSPACE, path, ctx);
4769         if ((error = namei(&nd)))
4770                 return (error);
4771         error = chmod2(ctx, nd.ni_vp, vap);
4772         vnode_put(nd.ni_vp);
4773         nameidone(&nd);
4774         return(error);
4775 }
4776
4777 /*
4778  * chmod_extended: Change the mode of a file given a path name; with extended
4779  * argument list (including extended security (ACL)).
4780  *
4781  * Parameters:  p                       Process requesting the open
4782  *              uap                     User argument descriptor (see below)
4783  *              retval                  (ignored)
4784  *
4785  * Indirect:    uap->path               Path to object (same as 'chmod')
4786  *              uap->uid                UID to set
4787  *              uap->gid                GID to set
4788  *              uap->mode               File mode to set (same as 'chmod')
4789  *              uap->xsecurity          ACL to set (or delete)
4790  *
4791  * Returns:     0                       Success
4792  *              !0                      errno value
4793  *
4794  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
4795  *
4796  * XXX:         We should enummerate the possible errno values here, and where
4797  *              in the code they originated.
4798  */
4799 int
4800 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
4801 {
4802         int error;
4803         struct vnode_attr va;
4804         kauth_filesec_t xsecdst;
4805
4806         AUDIT_ARG(owner, uap->uid, uap->gid);
4807
4808         VATTR_INIT(&va);
4809         if (uap->mode != -1)
4810                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4811         if (uap->uid != KAUTH_UID_NONE)
4812                 VATTR_SET(&va, va_uid, uap->uid);
4813         if (uap->gid != KAUTH_GID_NONE)
4814                 VATTR_SET(&va, va_gid, uap->gid);
4815
4816         xsecdst = NULL;
4817         switch(uap->xsecurity) {
4818                 /* explicit remove request */
4819         case CAST_USER_ADDR_T((void *)1):       /* _FILESEC_REMOVE_ACL */
4820                 VATTR_SET(&va, va_acl, NULL);
4821                 break;
4822                 /* not being set */
4823         case USER_ADDR_NULL:
4824                 break;
4825         default:
4826                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4827                         return(error);
4828                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4829                 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
4830         }
4831
4832         error = chmod1(vfs_context_current(), uap->path, &va);
4833
4834         if (xsecdst != NULL)
4835                 kauth_filesec_free(xsecdst);
4836         return(error);
4837 }
4838
4839 /*
4840  * Returns:     0                       Success
4841  *              chmod1:???              [anything chmod1 can return]
4842  */
4843 int
4844 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
4845 {
4846         struct vnode_attr va;
4847
4848         VATTR_INIT(&va);
4849         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4850
4851         return(chmod1(vfs_context_current(), uap->path, &va));
4852 }
4853
4854 /*
4855  * Change mode of a file given a file descriptor.
4856  */
4857 static int
4858 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
4859 {
4860         vnode_t vp;
4861         int error;
4862
4863         AUDIT_ARG(fd, fd);
4864
4865         if ((error = file_vnode(fd, &vp)) != 0)
4866                 return (error);
4867         if ((error = vnode_getwithref(vp)) != 0) {
4868                 file_drop(fd);
4869                 return(error);
4870         }
4871         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4872
4873         error = chmod2(vfs_context_current(), vp, vap);
4874         (void)vnode_put(vp);
4875         file_drop(fd);
4876
4877         return (error);
4878 }
4879
4880 /*
4881  * fchmod_extended: Change mode of a file given a file descriptor; with
4882  * extended argument list (including extended security (ACL)).
4883  *
4884  * Parameters:    p                       Process requesting to change file mode
4885  *                uap                     User argument descriptor (see below)
4886  *                retval                  (ignored)
4887  *
4888  * Indirect:      uap->mode               File mode to set (same as 'chmod')
4889  *                uap->uid                UID to set
4890  *                uap->gid                GID to set
4891  *                uap->xsecurity          ACL to set (or delete)
4892  *                uap->fd                 File descriptor of file to change mode
4893  *
4894  * Returns:        0                      Success
4895  *                !0                      errno value
4896  *
4897  */
4898 int
4899 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
4900 {
4901         int error;
4902         struct vnode_attr va;
4903         kauth_filesec_t xsecdst;
4904
4905         AUDIT_ARG(owner, uap->uid, uap->gid);
4906
4907         VATTR_INIT(&va);
4908         if (uap->mode != -1)
4909                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4910         if (uap->uid != KAUTH_UID_NONE)
4911                 VATTR_SET(&va, va_uid, uap->uid);
4912         if (uap->gid != KAUTH_GID_NONE)
4913                 VATTR_SET(&va, va_gid, uap->gid);
4914
4915         xsecdst = NULL;
4916         switch(uap->xsecurity) {
4917         case USER_ADDR_NULL:
4918                 VATTR_SET(&va, va_acl, NULL);
4919                 break;
4920         case CAST_USER_ADDR_T(-1):
4921                 break;
4922         default:
4923                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4924                         return(error);
4925                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4926         }
4927
4928         error = fchmod1(p, uap->fd, &va);
4929
4930
4931         switch(uap->xsecurity) {
4932         case USER_ADDR_NULL:
4933         case CAST_USER_ADDR_T(-1):
4934                 break;
4935         default:
4936                 if (xsecdst != NULL)
4937                         kauth_filesec_free(xsecdst);
4938         }
4939         return(error);
4940 }
4941
4942 int
4943 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
4944 {
4945         struct vnode_attr va;
4946
4947         VATTR_INIT(&va);
4948         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4949
4950         return(fchmod1(p, uap->fd, &va));
4951 }
4952
4953
4954 /*
4955  * Set ownership given a path name.
4956  */
4957 /* ARGSUSED */
4958 static int
4959 chown1(vfs_context_t ctx, struct chown_args *uap, __unused int32_t *retval, int follow)
4960 {
4961         vnode_t vp;
4962         struct vnode_attr va;
4963         int error;
4964         struct nameidata nd;
4965         kauth_action_t action;
4966
4967         AUDIT_ARG(owner, uap->uid, uap->gid);
4968
4969         NDINIT(&nd, LOOKUP, (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1,
4970                 UIO_USERSPACE, uap->path, ctx);
4971         error = namei(&nd);
4972         if (error)
4973                 return (error);
4974         vp = nd.ni_vp;
4975
4976         nameidone(&nd);
4977
4978         VATTR_INIT(&va);
4979         if (uap->uid != VNOVAL)
4980                 VATTR_SET(&va, va_uid, uap->uid);
4981         if (uap->gid != VNOVAL)
4982                 VATTR_SET(&va, va_gid, uap->gid);
4983
4984 #if CONFIG_MACF
4985         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
4986         if (error)
4987                 goto out;
4988 #endif
4989
4990         /* preflight and authorize attribute changes */
4991         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4992                 goto out;
4993         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
4994                 goto out;
4995         error = vnode_setattr(vp, &va, ctx);
4996
4997 out:
4998         /*
4999          * EACCES is only allowed from namei(); permissions failure should
5000          * return EPERM, so we need to translate the error code.
5001          */
5002         if (error == EACCES)
5003                 error = EPERM;
5004
5005         vnode_put(vp);
5006         return (error);
5007 }
5008
5009 int
5010 chown(__unused proc_t p, struct chown_args *uap, int32_t *retval)
5011 {
5012         return chown1(vfs_context_current(), uap, retval, 1);
5013 }
5014
5015 int
5016 lchown(__unused proc_t p, struct lchown_args *uap, int32_t *retval)
5017 {
5018         /* Argument list identical, but machine generated; cast for chown1() */
5019         return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0);
5020 }
5021
5022 /*
5023  * Set ownership given a file descriptor.
5024  */
5025 /* ARGSUSED */
5026 int
5027 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
5028 {
5029         struct vnode_attr va;
5030         vfs_context_t ctx = vfs_context_current();
5031         vnode_t vp;
5032         int error;
5033         kauth_action_t action;
5034
5035         AUDIT_ARG(owner, uap->uid, uap->gid);
5036         AUDIT_ARG(fd, uap->fd);
5037
5038         if ( (error = file_vnode(uap->fd, &vp)) )
5039                 return (error);
5040
5041         if ( (error = vnode_getwithref(vp)) ) {
5042                 file_drop(uap->fd);
5043                 return(error);
5044         }
5045         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5046
5047         VATTR_INIT(&va);
5048         if (uap->uid != VNOVAL)
5049                 VATTR_SET(&va, va_uid, uap->uid);
5050         if (uap->gid != VNOVAL)
5051                 VATTR_SET(&va, va_gid, uap->gid);
5052
5053 #if NAMEDSTREAMS
5054         /* chown calls are not allowed for resource forks. */
5055         if (vp->v_flag & VISNAMEDSTREAM) {
5056                 error = EPERM;
5057                 goto out;
5058         }
5059 #endif
5060
5061 #if CONFIG_MACF
5062         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
5063         if (error)
5064                 goto out;
5065 #endif
5066
5067         /* preflight and authorize attribute changes */
5068         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5069                 goto out;
5070         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5071                 if (error == EACCES)
5072                         error = EPERM;
5073                 goto out;
5074         }
5075         error = vnode_setattr(vp, &va, ctx);
5076
5077 out:
5078         (void)vnode_put(vp);
5079         file_drop(uap->fd);
5080         return (error);
5081 }
5082
5083 static int
5084 getutimes(user_addr_t usrtvp, struct timespec *tsp)
5085 {
5086         int error;
5087
5088         if (usrtvp == USER_ADDR_NULL) {
5089                 struct timeval old_tv;
5090                 /* XXX Y2038 bug because of microtime argument */
5091                 microtime(&old_tv);
5092                 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
5093                 tsp[1] = tsp[0];
5094         } else {
5095                 if (IS_64BIT_PROCESS(current_proc())) {
5096                         struct user64_timeval tv[2];
5097                         error = copyin(usrtvp, (void *)tv, sizeof(tv));
5098                         if (error)
5099                                 return (error);
5100                         TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
5101                         TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
5102                 } else {
5103                         struct user32_timeval tv[2];
5104                         error = copyin(usrtvp, (void *)tv, sizeof(tv));
5105                         if (error)
5106                                 return (error);
5107                         TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
5108                         TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
5109                 }
5110         }
5111         return 0;
5112 }
5113
5114 static int
5115 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
5116         int nullflag)
5117 {
5118         int error;
5119         struct vnode_attr va;
5120         kauth_action_t action;
5121
5122         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5123
5124         VATTR_INIT(&va);
5125         VATTR_SET(&va, va_access_time, ts[0]);
5126         VATTR_SET(&va, va_modify_time, ts[1]);
5127         if (nullflag)
5128                 va.va_vaflags |= VA_UTIMES_NULL;
5129
5130 #if NAMEDSTREAMS
5131         /* utimes calls are not allowed for resource forks. */
5132         if (vp->v_flag & VISNAMEDSTREAM) {
5133                 error = EPERM;
5134                 goto out;
5135         }
5136 #endif
5137
5138 #if CONFIG_MACF
5139         error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
5140         if (error)
5141                 goto out;
5142 #endif
5143         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
5144                 if (!nullflag && error == EACCES)
5145                         error = EPERM;
5146                 goto out;
5147         }
5148
5149         /* since we may not need to auth anything, check here */
5150         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5151                 if (!nullflag && error == EACCES)
5152                         error = EPERM;
5153                 goto out;
5154         }
5155         error = vnode_setattr(vp, &va, ctx);
5156
5157 out:
5158         return error;
5159 }
5160
5161 /*
5162  * Set the access and modification times of a file.
5163  */
5164 /* ARGSUSED */
5165 int
5166 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
5167 {
5168         struct timespec ts[2];
5169         user_addr_t usrtvp;
5170         int error;
5171         struct nameidata nd;
5172         vfs_context_t ctx = vfs_context_current();
5173
5174         /*
5175          * AUDIT: Needed to change the order of operations to do the
5176          * name lookup first because auditing wants the path.
5177          */
5178         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
5179                 UIO_USERSPACE, uap->path, ctx);
5180         error = namei(&nd);
5181         if (error)
5182                 return (error);
5183         nameidone(&nd);
5184
5185         /*
5186          * Fetch the user-supplied time.  If usrtvp is USER_ADDR_NULL, we fetch
5187          * the current time instead.
5188          */
5189         usrtvp = uap->tptr;
5190         if ((error = getutimes(usrtvp, ts)) != 0)
5191                 goto out;
5192
5193         error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
5194
5195 out:
5196         vnode_put(nd.ni_vp);
5197         return (error);
5198 }
5199
5200 /*
5201  * Set the access and modification times of a file.
5202  */
5203 /* ARGSUSED */
5204 int
5205 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
5206 {
5207         struct timespec ts[2];
5208         vnode_t vp;
5209         user_addr_t usrtvp;
5210         int error;
5211
5212         AUDIT_ARG(fd, uap->fd);
5213         usrtvp = uap->tptr;
5214         if ((error = getutimes(usrtvp, ts)) != 0)
5215                 return (error);
5216         if ((error = file_vnode(uap->fd, &vp)) != 0)
5217                 return (error);
5218         if((error = vnode_getwithref(vp))) {
5219                 file_drop(uap->fd);
5220                 return(error);
5221         }
5222
5223         error =  setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
5224         vnode_put(vp);
5225         file_drop(uap->fd);
5226         return(error);
5227 }
5228
5229 /*
5230  * Truncate a file given its path name.
5231  */
5232 /* ARGSUSED */
5233 int
5234 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
5235 {
5236         vnode_t vp;
5237         struct vnode_attr va;
5238         vfs_context_t ctx = vfs_context_current();
5239         int error;
5240         struct nameidata nd;
5241         kauth_action_t action;
5242
5243         if (uap->length < 0)
5244                 return(EINVAL);
5245         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
5246                 UIO_USERSPACE, uap->path, ctx);
5247         if ((error = namei(&nd)))
5248                 return (error);
5249         vp = nd.ni_vp;
5250
5251         nameidone(&nd);
5252
5253         VATTR_INIT(&va);
5254         VATTR_SET(&va, va_data_size, uap->length);
5255
5256 #if CONFIG_MACF
5257         error = mac_vnode_check_truncate(ctx, NOCRED, vp);
5258         if (error)
5259                 goto out;
5260 #endif
5261
5262         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5263                 goto out;
5264         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
5265                 goto out;
5266         error = vnode_setattr(vp, &va, ctx);
5267 out:
5268         vnode_put(vp);
5269         return (error);
5270 }
5271
5272 /*
5273  * Truncate a file given a file descriptor.
5274  */
5275 /* ARGSUSED */
5276 int
5277 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
5278 {
5279         vfs_context_t ctx = vfs_context_current();
5280         struct vnode_attr va;
5281         vnode_t vp;
5282         struct fileproc *fp;
5283         int error ;
5284         int fd = uap->fd;
5285
5286         AUDIT_ARG(fd, uap->fd);
5287         if (uap->length < 0)
5288                 return(EINVAL);
5289
5290         if ( (error = fp_lookup(p,fd,&fp,0)) ) {
5291                 return(error);
5292         }
5293
5294         if (fp->f_fglob->fg_type == DTYPE_PSXSHM) {
5295                 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
5296                 goto out;
5297         }
5298         if (fp->f_fglob->fg_type != DTYPE_VNODE)  {
5299                 error = EINVAL;
5300                 goto out;
5301         }
5302
5303         vp = (vnode_t)fp->f_fglob->fg_data;
5304
5305         if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
5306                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
5307                 error = EINVAL;
5308                 goto out;
5309         }
5310
5311         if ((error = vnode_getwithref(vp)) != 0) {
5312                 goto out;
5313         }
5314
5315         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5316
5317 #if CONFIG_MACF
5318         error = mac_vnode_check_truncate(ctx,
5319             fp->f_fglob->fg_cred, vp);
5320         if (error) {
5321                 (void)vnode_put(vp);
5322                 goto out;
5323         }
5324 #endif
5325         VATTR_INIT(&va);
5326         VATTR_SET(&va, va_data_size, uap->length);
5327         error = vnode_setattr(vp, &va, ctx);
5328         (void)vnode_put(vp);
5329 out:
5330         file_drop(fd);
5331         return (error);
5332 }
5333
5334
5335 /*
5336  * Sync an open file with synchronized I/O _file_ integrity completion
5337  */
5338 /* ARGSUSED */
5339 int
5340 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
5341 {
5342         __pthread_testcancel(1);
5343         return(fsync_common(p, uap, MNT_WAIT));
5344 }
5345
5346
5347 /*
5348  * Sync an open file with synchronized I/O _file_ integrity completion
5349  *
5350  * Notes:       This is a legacy support function that does not test for
5351  *              thread cancellation points.
5352  */
5353 /* ARGSUSED */
5354 int
5355 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
5356 {
5357         return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
5358 }
5359
5360
5361 /*
5362  * Sync an open file with synchronized I/O _data_ integrity completion
5363  */
5364 /* ARGSUSED */
5365 int
5366 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
5367 {
5368         __pthread_testcancel(1);
5369         return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
5370 }
5371
5372
5373 /*
5374  * fsync_common
5375  *
5376  * Common fsync code to support both synchronized I/O file integrity completion
5377  * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
5378  *
5379  * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
5380  * will only guarantee that the file data contents are retrievable.  If
5381  * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
5382  * includes additional metadata unnecessary for retrieving the file data
5383  * contents, such as atime, mtime, ctime, etc., also be committed to stable
5384  * storage.
5385  *
5386  * Parameters:  p                               The process
5387  *              uap->fd                         The descriptor to synchronize
5388  *              flags                           The data integrity flags
5389  *
5390  * Returns:     int                             Success
5391  *      fp_getfvp:EBADF                         Bad file descriptor
5392  *      fp_getfvp:ENOTSUP                       fd does not refer to a vnode
5393  *      VNOP_FSYNC:???                          unspecified
5394  *
5395  * Notes:       We use struct fsync_args because it is a short name, and all
5396  *              caller argument structures are otherwise identical.
5397  */
5398 static int
5399 fsync_common(proc_t p, struct fsync_args *uap, int flags)
5400 {
5401         vnode_t vp;
5402         struct fileproc *fp;
5403         vfs_context_t ctx = vfs_context_current();
5404         int error;
5405
5406         AUDIT_ARG(fd, uap->fd);
5407
5408         if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
5409                 return (error);
5410         if ( (error = vnode_getwithref(vp)) ) {
5411                 file_drop(uap->fd);
5412                 return(error);
5413         }
5414
5415         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5416
5417         error = VNOP_FSYNC(vp, flags, ctx);
5418
5419 #if NAMEDRSRCFORK
5420         /* Sync resource fork shadow file if necessary. */
5421         if ((error == 0) &&
5422             (vp->v_flag & VISNAMEDSTREAM) &&
5423             (vp->v_parent != NULLVP) &&
5424             vnode_isshadow(vp) &&
5425             (fp->f_flags & FP_WRITTEN)) {
5426                 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
5427         }
5428 #endif
5429
5430         (void)vnode_put(vp);
5431         file_drop(uap->fd);
5432         return (error);
5433 }
5434
5435 /*
5436  * Duplicate files.  Source must be a file, target must be a file or
5437  * must not exist.
5438  *
5439  * XXX Copyfile authorisation checking is woefully inadequate, and will not
5440  *     perform inheritance correctly.
5441  */
5442 /* ARGSUSED */
5443 int
5444 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
5445 {
5446         vnode_t tvp, fvp, tdvp, sdvp;
5447         struct nameidata fromnd, tond;
5448         int error;
5449         vfs_context_t ctx = vfs_context_current();
5450
5451         /* Check that the flags are valid. */
5452
5453         if (uap->flags & ~CPF_MASK) {
5454                 return(EINVAL);
5455         }
5456
5457         NDINIT(&fromnd, LOOKUP, SAVESTART | AUDITVNPATH1,
5458                 UIO_USERSPACE, uap->from, ctx);
5459         if ((error = namei(&fromnd)))
5460                 return (error);
5461         fvp = fromnd.ni_vp;
5462
5463         NDINIT(&tond, CREATE,  LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
5464             UIO_USERSPACE, uap->to, ctx);
5465         if ((error = namei(&tond))) {
5466                 goto out1;
5467         }
5468         tdvp = tond.ni_dvp;
5469         tvp = tond.ni_vp;
5470
5471         if (tvp != NULL) {
5472                 if (!(uap->flags & CPF_OVERWRITE)) {
5473                         error = EEXIST;
5474                         goto out;
5475                 }
5476         }
5477         if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
5478                 error = EISDIR;
5479                 goto out;
5480         }
5481
5482         if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
5483                 goto out;
5484
5485         if (fvp == tdvp)
5486                 error = EINVAL;
5487         /*
5488          * If source is the same as the destination (that is the
5489          * same inode number) then there is nothing to do.
5490          * (fixed to have POSIX semantics - CSM 3/2/98)
5491          */
5492         if (fvp == tvp)
5493                 error = -1;
5494         if (!error)
5495                 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
5496 out:
5497         sdvp = tond.ni_startdir;
5498         /*
5499          * nameidone has to happen before we vnode_put(tdvp)
5500          * since it may need to release the fs_nodelock on the tdvp
5501          */
5502         nameidone(&tond);
5503
5504         if (tvp)
5505                 vnode_put(tvp);
5506         vnode_put(tdvp);
5507         vnode_put(sdvp);
5508 out1:
5509         vnode_put(fvp);
5510
5511         if (fromnd.ni_startdir)
5512                 vnode_put(fromnd.ni_startdir);
5513         nameidone(&fromnd);
5514
5515         if (error == -1)
5516                 return (0);
5517         return (error);
5518 }
5519
5520
5521 /*
5522  * Rename files.  Source and destination must either both be directories,
5523  * or both not be directories.  If target is a directory, it must be empty.
5524  */
5525 /* ARGSUSED */
5526 int
5527 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
5528 {
5529         vnode_t tvp, tdvp;
5530         vnode_t fvp, fdvp;
5531         struct nameidata fromnd, tond;
5532         vfs_context_t ctx = vfs_context_current();
5533         int error;
5534         int do_retry;
5535         int mntrename;
5536         int need_event;
5537         const char *oname;
5538         char *from_name = NULL, *to_name = NULL;
5539         int from_len=0, to_len=0;
5540         int holding_mntlock;
5541         mount_t locked_mp = NULL;
5542         vnode_t oparent;
5543 #if CONFIG_FSE
5544         fse_info from_finfo, to_finfo;
5545 #endif
5546         int from_truncated=0, to_truncated;
5547
5548         holding_mntlock = 0;
5549     do_retry = 0;
5550 retry:
5551         fvp = tvp = NULL;
5552         fdvp = tdvp = NULL;
5553         mntrename = FALSE;
5554
5555         NDINIT(&fromnd, DELETE, WANTPARENT | AUDITVNPATH1, UIO_USERSPACE, uap->from, ctx);
5556
5557         if ( (error = namei(&fromnd)) )
5558                 goto out1;
5559         fdvp = fromnd.ni_dvp;
5560         fvp  = fromnd.ni_vp;
5561
5562 #if CONFIG_MACF
5563         error = mac_vnode_check_rename_from(ctx, fdvp, fvp, &fromnd.ni_cnd);
5564         if (error)
5565                 goto out1;
5566 #endif
5567
5568         NDINIT(&tond, RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK , UIO_USERSPACE, uap->to, ctx);
5569         if (fvp->v_type == VDIR)
5570                 tond.ni_cnd.cn_flags |= WILLBEDIR;
5571
5572         if ( (error = namei(&tond)) ) {
5573                 /*
5574                  * Translate error code for rename("dir1", "dir2/.").
5575                  */
5576                 if (error == EISDIR && fvp->v_type == VDIR)
5577                         error = EINVAL;
5578                 goto out1;
5579         }
5580         tdvp = tond.ni_dvp;
5581         tvp  = tond.ni_vp;
5582
5583 #if CONFIG_MACF
5584         error = mac_vnode_check_rename_to(ctx,
5585             tdvp, tvp, fdvp == tdvp, &tond.ni_cnd);
5586         if (error)
5587                 goto out1;
5588 #endif
5589
5590         if (tvp != NULL) {
5591                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
5592                         error = ENOTDIR;
5593                         goto out1;
5594                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
5595                         error = EISDIR;
5596                         goto out1;
5597                 }
5598         }
5599         if (fvp == tdvp) {
5600                 error = EINVAL;
5601                 goto out1;
5602         }
5603         /*
5604          * If the source and destination are the same (i.e. they're
5605          * links to the same vnode) and the target file system is
5606          * case sensitive, then there is nothing to do.
5607          */
5608         if (fvp == tvp) {
5609                 int pathconf_val;
5610
5611                 /*
5612                  * Note: if _PC_CASE_SENSITIVE selector isn't supported,
5613                  * then assume that this file system is case sensitive.
5614                  */
5615                 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
5616                     pathconf_val != 0) {
5617                         goto out1;
5618                 }
5619         }
5620
5621         /*
5622          * Authorization.
5623          *
5624          * If tvp is a directory and not the same as fdvp, or tdvp is not
5625          * the same as fdvp, the node is moving between directories and we
5626          * need rights to remove from the old and add to the new.
5627          *
5628          * If tvp already exists and is not a directory, we need to be
5629          * allowed to delete it.
5630          *
5631          * Note that we do not inherit when renaming.
5632          *
5633          * XXX This needs to be revisited to implement the deferred-inherit bit
5634          */
5635         {
5636                 int moving = 0;
5637
5638                 error = 0;
5639                 if ((tvp != NULL) && vnode_isdir(tvp)) {
5640                         if (tvp != fdvp)
5641                                 moving = 1;
5642                 } else if (tdvp != fdvp) {
5643                         moving = 1;
5644                 }
5645                 /*
5646                  * must have delete rights to remove the old name even in
5647                  * the simple case of fdvp == tdvp.
5648                  *
5649                  * If fvp is a directory, and we are changing it's parent,
5650                  * then we also need rights to rewrite its ".." entry as well.
5651                  */
5652                 if (vnode_isdir(fvp)) {
5653                         if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE | KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
5654                                 goto auth_exit;
5655                 } else {
5656                 if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE, ctx)) != 0)
5657                         goto auth_exit;
5658                 }
5659                 if (moving) {
5660                         /* moving into tdvp or tvp, must have rights to add */
5661                         if ((error = vnode_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp,
5662                                  NULL,
5663                                  vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE,
5664                                  ctx)) != 0) {
5665                 /*
5666                  * We could encounter a race where after doing the namei, tvp stops
5667                  * being valid. If so, simply re-drive the rename call from the
5668                  * top.
5669                  */
5670                  if (error == ENOENT) {
5671                      do_retry = 1;
5672                  }
5673                                 goto auth_exit;
5674                         }
5675                 } else {
5676                         /* node staying in same directory, must be allowed to add new name */
5677                         if ((error = vnode_authorize(fdvp, NULL,
5678                                  vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, ctx)) != 0)
5679                                 goto auth_exit;
5680                 }
5681                 /* overwriting tvp */
5682                 if ((tvp != NULL) && !vnode_isdir(tvp) &&
5683                     ((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx)) != 0)) {
5684             /*
5685              * We could encounter a race where after doing the namei, tvp stops
5686              * being valid. If so, simply re-drive the rename call from the
5687              * top.
5688              */
5689             if (error == ENOENT) {
5690                 do_retry = 1;
5691             }
5692                         goto auth_exit;
5693                 }
5694
5695                 /* XXX more checks? */
5696
5697 auth_exit:
5698                 /* authorization denied */
5699                 if (error != 0)
5700                         goto out1;
5701         }
5702         /*
5703          * Allow the renaming of mount points.
5704          * - target must not exist
5705          * - target must reside in the same directory as source
5706          * - union mounts cannot be renamed
5707          * - "/" cannot be renamed
5708          */
5709         if ((fvp->v_flag & VROOT) &&
5710             (fvp->v_type == VDIR) &&
5711             (tvp == NULL)  &&
5712             (fvp->v_mountedhere == NULL)  &&
5713             (fdvp == tdvp)  &&
5714             ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0)  &&
5715             (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
5716                 vnode_t coveredvp;
5717
5718                 /* switch fvp to the covered vnode */
5719                 coveredvp = fvp->v_mount->mnt_vnodecovered;
5720                 if ( (vnode_getwithref(coveredvp)) ) {
5721                         error = ENOENT;
5722                         goto out1;
5723                 }
5724                 vnode_put(fvp);
5725
5726                 fvp = coveredvp;
5727                 mntrename = TRUE;
5728         }
5729         /*
5730          * Check for cross-device rename.
5731          */
5732         if ((fvp->v_mount != tdvp->v_mount) ||
5733             (tvp && (fvp->v_mount != tvp->v_mount))) {
5734                 error = EXDEV;
5735                 goto out1;
5736         }
5737         /*
5738          * Avoid renaming "." and "..".
5739          */
5740         if (fvp->v_type == VDIR &&
5741             ((fdvp == fvp) ||
5742              (fromnd.ni_cnd.cn_namelen == 1 && fromnd.ni_cnd.cn_nameptr[0] == '.') ||
5743              ((fromnd.ni_cnd.cn_flags | tond.ni_cnd.cn_flags) & ISDOTDOT)) ) {
5744                 error = EINVAL;
5745                 goto out1;
5746         }
5747         /*
5748          * The following edge case is caught here:
5749          * (to cannot be a descendent of from)
5750          *
5751          *       o fdvp
5752          *      /
5753          *     /
5754          *    o fvp
5755          *     \
5756          *      \
5757          *       o tdvp
5758          *      /
5759          *     /
5760          *    o tvp
5761          */
5762         if (tdvp->v_parent == fvp) {
5763                 error = EINVAL;
5764                 goto out1;
5765         }
5766
5767         /*
5768          * If source is the same as the destination (that is the
5769          * same inode number) then there is nothing to do...
5770          * EXCEPT if the underlying file system supports case
5771          * insensitivity and is case preserving.  In this case
5772          * the file system needs to handle the special case of
5773          * getting the same vnode as target (fvp) and source (tvp).
5774          *
5775          * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
5776          * and _PC_CASE_PRESERVING can have this exception, and they need to
5777          * handle the special case of getting the same vnode as target and
5778          * source.  NOTE: Then the target is unlocked going into vnop_rename,
5779          * so not to cause locking problems. There is a single reference on tvp.
5780          *
5781          * NOTE - that fvp == tvp also occurs if they are hard linked and
5782          * that correct behaviour then is just to return success without doing
5783          * anything.
5784          */
5785         if (fvp == tvp && fdvp == tdvp) {
5786                 if (fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
5787                     !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
5788                           fromnd.ni_cnd.cn_namelen)) {
5789                         goto out1;
5790                 }
5791         }
5792
5793         if (holding_mntlock && fvp->v_mount != locked_mp) {
5794                 /*
5795                  * we're holding a reference and lock
5796                  * on locked_mp, but it no longer matches
5797                  * what we want to do... so drop our hold
5798                  */
5799                 mount_unlock_renames(locked_mp);
5800                 mount_drop(locked_mp, 0);
5801                 holding_mntlock = 0;
5802         }
5803         if (tdvp != fdvp && fvp->v_type == VDIR) {
5804                 /*
5805                  * serialize renames that re-shape
5806                  * the tree... if holding_mntlock is
5807                  * set, then we're ready to go...
5808                  * otherwise we
5809                  * first need to drop the iocounts
5810                  * we picked up, second take the
5811                  * lock to serialize the access,
5812                  * then finally start the lookup
5813                  * process over with the lock held
5814                  */
5815                 if (!holding_mntlock) {
5816                         /*
5817                          * need to grab a reference on
5818                          * the mount point before we
5819                          * drop all the iocounts... once
5820                          * the iocounts are gone, the mount
5821                          * could follow
5822                          */
5823                         locked_mp = fvp->v_mount;
5824                         mount_ref(locked_mp, 0);
5825
5826                         /*
5827                          * nameidone has to happen before we vnode_put(tvp)
5828                          * since it may need to release the fs_nodelock on the tvp
5829                          */
5830                         nameidone(&tond);
5831
5832                         if (tvp)
5833                                 vnode_put(tvp);
5834                         vnode_put(tdvp);
5835
5836                         /*
5837                          * nameidone has to happen before we vnode_put(fdvp)
5838                          * since it may need to release the fs_nodelock on the fvp
5839                          */
5840                         nameidone(&fromnd);
5841
5842                         vnode_put(fvp);
5843                         vnode_put(fdvp);
5844
5845                         mount_lock_renames(locked_mp);
5846                         holding_mntlock = 1;
5847
5848                         goto retry;
5849                 }
5850         } else {
5851                 /*
5852                  * when we dropped the iocounts to take
5853                  * the lock, we allowed the identity of
5854                  * the various vnodes to change... if they did,
5855                  * we may no longer be dealing with a rename
5856                  * that reshapes the tree... once we're holding
5857                  * the iocounts, the vnodes can't change type
5858                  * so we're free to drop the lock at this point
5859                  * and continue on
5860                  */
5861                 if (holding_mntlock) {
5862                         mount_unlock_renames(locked_mp);
5863                         mount_drop(locked_mp, 0);
5864                         holding_mntlock = 0;
5865                 }
5866         }
5867         // save these off so we can later verify that fvp is the same
5868         oname   = fvp->v_name;
5869         oparent = fvp->v_parent;
5870
5871 #if CONFIG_FSE
5872         need_event = need_fsevent(FSE_RENAME, fvp);
5873         if (need_event) {
5874                 get_fse_info(fvp, &from_finfo, ctx);
5875
5876                 if (tvp) {
5877                         get_fse_info(tvp, &to_finfo, ctx);
5878                 }
5879         }
5880 #else
5881         need_event = 0;
5882 #endif /* CONFIG_FSE */
5883
5884         if (need_event || kauth_authorize_fileop_has_listeners()) {
5885                 GET_PATH(from_name);
5886                 if (from_name == NULL) {
5887                         error = ENOMEM;
5888                         goto out1;
5889                 }
5890
5891                 from_len = safe_getpath(fdvp, fromnd.ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
5892
5893                 GET_PATH(to_name);
5894                 if (to_name == NULL) {
5895                         error = ENOMEM;
5896                         goto out1;
5897                 }
5898
5899                 to_len = safe_getpath(tdvp, tond.ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
5900         }
5901
5902         error = VNOP_RENAME(fdvp, fvp, &fromnd.ni_cnd,
5903                             tdvp, tvp, &tond.ni_cnd,
5904                             ctx);
5905
5906         if (holding_mntlock) {
5907                 /*
5908                  * we can drop our serialization
5909                  * lock now
5910                  */
5911                 mount_unlock_renames(locked_mp);
5912                 mount_drop(locked_mp, 0);
5913                 holding_mntlock = 0;
5914         }
5915         if (error) {
5916         /*
5917          * We may encounter a race in the VNOP where the destination didn't
5918          * exist when we did the namei, but it does by the time we go and
5919          * try to create the entry. In this case, we should re-drive this rename
5920          * call from the top again.  Currently, only HFS bubbles out ERECYCLE,
5921                  * but other filesystems susceptible to this race could return it, too.
5922          */
5923         if (error == ERECYCLE) {
5924             do_retry = 1;
5925         }
5926
5927                 goto out1;
5928         }
5929
5930         /* call out to allow 3rd party notification of rename.
5931          * Ignore result of kauth_authorize_fileop call.
5932          */
5933         kauth_authorize_fileop(vfs_context_ucred(ctx),
5934                         KAUTH_FILEOP_RENAME,
5935                         (uintptr_t)from_name, (uintptr_t)to_name);
5936
5937 #if CONFIG_FSE
5938         if (from_name != NULL && to_name != NULL) {
5939                 if (from_truncated || to_truncated) {
5940                         // set it here since only the from_finfo gets reported up to user space
5941                         from_finfo.mode |= FSE_TRUNCATED_PATH;
5942                 }
5943                 if (tvp) {
5944                         add_fsevent(FSE_RENAME, ctx,
5945                                     FSE_ARG_STRING, from_len, from_name,
5946                                     FSE_ARG_FINFO, &from_finfo,
5947                                     FSE_ARG_STRING, to_len, to_name,
5948                                     FSE_ARG_FINFO, &to_finfo,
5949                                     FSE_ARG_DONE);
5950                 } else {
5951                         add_fsevent(FSE_RENAME, ctx,
5952                                     FSE_ARG_STRING, from_len, from_name,
5953                                     FSE_ARG_FINFO, &from_finfo,
5954                                     FSE_ARG_STRING, to_len, to_name,
5955                                     FSE_ARG_DONE);
5956                 }
5957         }
5958 #endif /* CONFIG_FSE */
5959
5960         /*
5961          * update filesystem's mount point data
5962          */
5963         if (mntrename) {
5964                 char *cp, *pathend, *mpname;
5965                 char * tobuf;
5966                 struct mount *mp;
5967                 int maxlen;
5968                 size_t len = 0;
5969
5970                 mp = fvp->v_mountedhere;
5971
5972                 if (vfs_busy(mp, LK_NOWAIT)) {
5973                         error = EBUSY;
5974                         goto out1;
5975                 }
5976                 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
5977
5978                 error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len);
5979                 if (!error) {
5980                         /* find current mount point prefix */
5981                         pathend = &mp->mnt_vfsstat.f_mntonname[0];
5982                         for (cp = pathend; *cp != '\0'; ++cp) {
5983                                 if (*cp == '/')
5984                                         pathend = cp + 1;
5985                         }
5986                         /* find last component of target name */
5987                         for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
5988                                 if (*cp == '/')
5989                                         mpname = cp + 1;
5990                         }
5991                         /* append name to prefix */
5992                         maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
5993                         bzero(pathend, maxlen);
5994                         strlcpy(pathend, mpname, maxlen);
5995                 }
5996                 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
5997
5998                 vfs_unbusy(mp);
5999         }
6000         /*
6001          * fix up name & parent pointers.  note that we first
6002          * check that fvp has the same name/parent pointers it
6003          * had before the rename call... this is a 'weak' check
6004          * at best...
6005          */
6006         if (oname == fvp->v_name && oparent == fvp->v_parent) {
6007                 int update_flags;
6008
6009                 update_flags = VNODE_UPDATE_NAME;
6010
6011                 if (fdvp != tdvp)
6012                         update_flags |= VNODE_UPDATE_PARENT;
6013
6014                 vnode_update_identity(fvp, tdvp, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen, tond.ni_cnd.cn_hash, update_flags);
6015         }
6016 out1:
6017         if (to_name != NULL) {
6018                 RELEASE_PATH(to_name);
6019                 to_name = NULL;
6020         }
6021         if (from_name != NULL) {
6022                 RELEASE_PATH(from_name);
6023                 from_name = NULL;
6024         }
6025         if (holding_mntlock) {
6026                 mount_unlock_renames(locked_mp);
6027                 mount_drop(locked_mp, 0);
6028                 holding_mntlock = 0;
6029         }
6030         if (tdvp) {
6031                 /*
6032                  * nameidone has to happen before we vnode_put(tdvp)
6033                  * since it may need to release the fs_nodelock on the tdvp
6034                  */
6035                 nameidone(&tond);
6036
6037                 if (tvp)
6038                         vnode_put(tvp);
6039                 vnode_put(tdvp);
6040         }
6041         if (fdvp) {
6042                 /*
6043                  * nameidone has to happen before we vnode_put(fdvp)
6044                  * since it may need to release the fs_nodelock on the fdvp
6045                  */
6046                 nameidone(&fromnd);
6047
6048                 if (fvp)
6049                         vnode_put(fvp);
6050                 vnode_put(fdvp);
6051         }
6052
6053     /*
6054      * If things changed after we did the namei, then we will re-drive
6055      * this rename call from the top.
6056      */
6057         if(do_retry) {
6058         do_retry = 0;
6059                 goto retry;
6060         }
6061
6062         return (error);
6063 }
6064
6065 /*
6066  * Make a directory file.
6067  *
6068  * Returns:     0                       Success
6069  *              EEXIST
6070  *      namei:???
6071  *      vnode_authorize:???
6072  *      vn_create:???
6073  */
6074 /* ARGSUSED */
6075 static int
6076 mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
6077 {
6078         vnode_t vp, dvp;
6079         int error;
6080         int update_flags = 0;
6081         struct nameidata nd;
6082
6083         AUDIT_ARG(mode, vap->va_mode);
6084         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
6085                 UIO_USERSPACE, path, ctx);
6086         nd.ni_cnd.cn_flags |= WILLBEDIR;
6087         error = namei(&nd);
6088         if (error)
6089                 return (error);
6090         dvp = nd.ni_dvp;
6091         vp = nd.ni_vp;
6092
6093         if (vp != NULL) {
6094                 error = EEXIST;
6095                 goto out;
6096         }
6097
6098         VATTR_SET(vap, va_type, VDIR);
6099
6100 #if CONFIG_MACF
6101         error = mac_vnode_check_create(ctx,
6102             nd.ni_dvp, &nd.ni_cnd, vap);
6103         if (error)
6104                 goto out;
6105 #endif
6106
6107         /* authorize addition of a directory to the parent */
6108         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
6109                 goto out;
6110
6111
6112         /* make the directory */
6113         if ((error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx)) != 0)
6114                 goto out;
6115
6116         // Make sure the name & parent pointers are hooked up
6117         if (vp->v_name == NULL)
6118                 update_flags |= VNODE_UPDATE_NAME;
6119         if (vp->v_parent == NULLVP)
6120                 update_flags |= VNODE_UPDATE_PARENT;
6121
6122         if (update_flags)
6123                 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
6124
6125 #if CONFIG_FSE
6126         add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
6127 #endif
6128
6129 out:
6130         /*
6131          * nameidone has to happen before we vnode_put(dvp)
6132          * since it may need to release the fs_nodelock on the dvp
6133          */
6134         nameidone(&nd);
6135
6136         if (vp)
6137                 vnode_put(vp);
6138         vnode_put(dvp);
6139
6140         return (error);
6141 }
6142
6143 /*
6144  * mkdir_extended: Create a directory; with extended security (ACL).
6145  *
6146  * Parameters:    p                       Process requesting to create the directory
6147  *                uap                     User argument descriptor (see below)
6148  *                retval                  (ignored)
6149  *
6150  * Indirect:      uap->path               Path of directory to create
6151  *                uap->mode               Access permissions to set
6152  *                uap->xsecurity          ACL to set
6153  *
6154  * Returns:        0                      Success
6155  *                !0                      Not success
6156  *
6157  */
6158 int
6159 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
6160 {
6161         int ciferror;
6162         kauth_filesec_t xsecdst;
6163         struct vnode_attr va;
6164
6165         AUDIT_ARG(owner, uap->uid, uap->gid);
6166
6167         xsecdst = NULL;
6168         if ((uap->xsecurity != USER_ADDR_NULL) &&
6169             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
6170                 return ciferror;
6171
6172         VATTR_INIT(&va);
6173         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
6174         if (xsecdst != NULL)
6175                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6176
6177         ciferror = mkdir1(vfs_context_current(), uap->path, &va);
6178         if (xsecdst != NULL)
6179                 kauth_filesec_free(xsecdst);
6180         return ciferror;
6181 }
6182
6183 int
6184 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
6185 {
6186         struct vnode_attr va;
6187
6188         VATTR_INIT(&va);
6189         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
6190
6191         return(mkdir1(vfs_context_current(), uap->path, &va));
6192 }
6193
6194 /*
6195  * Remove a directory file.
6196  */
6197 /* ARGSUSED */
6198 int
6199 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
6200 {
6201         vnode_t vp, dvp;
6202         int error;
6203         struct nameidata nd;
6204         vfs_context_t ctx = vfs_context_current();
6205
6206         int restart_flag;
6207         uint32_t oldvp_id = UINT32_MAX;
6208
6209         /*
6210          * This loop exists to restart rmdir in the unlikely case that two
6211          * processes are simultaneously trying to remove the same directory
6212          * containing orphaned appleDouble files.
6213          */
6214         do {
6215                 restart_flag = 0;
6216
6217                 NDINIT(&nd, DELETE, LOCKPARENT | AUDITVNPATH1,
6218                                 UIO_USERSPACE, uap->path, ctx);
6219                 error = namei(&nd);
6220                 if (error)
6221                         return (error);
6222
6223                 dvp = nd.ni_dvp;
6224                 vp = nd.ni_vp;
6225
6226
6227                 /*
6228                  * If being restarted check if the new vp
6229                  * still has the same v_id.
6230                  */
6231                 if (oldvp_id != UINT32_MAX && oldvp_id != vp->v_id) {
6232                         error = ENOENT;
6233                         goto out;
6234                 }
6235
6236                 if (vp->v_type != VDIR) {
6237                         /*
6238                          * rmdir only deals with directories
6239                          */
6240                         error = ENOTDIR;
6241                 } else if (dvp == vp) {
6242                         /*
6243                          * No rmdir "." please.
6244                          */
6245                         error = EINVAL;
6246                 } else if (vp->v_flag & VROOT) {
6247                         /*
6248                          * The root of a mounted filesystem cannot be deleted.
6249                          */
6250                         error = EBUSY;
6251                 } else {
6252 #if CONFIG_MACF
6253                         error = mac_vnode_check_unlink(ctx, dvp,
6254                                         vp, &nd.ni_cnd);
6255                         if (!error)
6256 #endif
6257                                 error = vnode_authorize(vp, nd.ni_dvp, KAUTH_VNODE_DELETE, ctx);
6258                 }
6259                 if (!error) {
6260                         char     *path = NULL;
6261                         int       len=0;
6262                         int has_listeners = 0;
6263                         int need_event = 0;
6264                         int truncated = 0;
6265 #if CONFIG_FSE
6266                         fse_info  finfo;
6267
6268                         need_event = need_fsevent(FSE_DELETE, dvp);
6269                         if (need_event) {
6270                                 get_fse_info(vp, &finfo, ctx);
6271                         }
6272 #endif
6273                         has_listeners = kauth_authorize_fileop_has_listeners();
6274                         if (need_event || has_listeners) {
6275                                 GET_PATH(path);
6276                                 if (path == NULL) {
6277                                         error = ENOMEM;
6278                                         goto out;
6279                                 }
6280
6281                                 len = safe_getpath(vp, NULL, path, MAXPATHLEN, &truncated);
6282 #if CONFIG_FSE
6283                                 if (truncated) {
6284                                         finfo.mode |= FSE_TRUNCATED_PATH;
6285                                 }
6286 #endif
6287                         }
6288
6289                         error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
6290
6291                         /*
6292                          * Special case to remove orphaned AppleDouble
6293                          * files. I don't like putting this in the kernel,
6294                          * but carbon does not like putting this in carbon either,
6295                          * so here we are.
6296                          */
6297                         if (error == ENOTEMPTY) {
6298                                 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
6299                                 if (error == EBUSY) {
6300                                         oldvp_id = vp->v_id;
6301                                         goto out;
6302                                 }
6303
6304
6305                                 /*
6306                                  * Assuming everything went well, we will try the RMDIR again
6307                                  */
6308                                 if (!error)
6309                                         error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
6310                         }
6311
6312                         /*
6313                          * Call out to allow 3rd party notification of delete.
6314                          * Ignore result of kauth_authorize_fileop call.
6315                          */
6316                         if (!error) {
6317                                 if (has_listeners) {
6318                                         kauth_authorize_fileop(vfs_context_ucred(ctx),
6319                                                         KAUTH_FILEOP_DELETE,
6320                                                         (uintptr_t)vp,
6321                                                         (uintptr_t)path);
6322                                 }
6323
6324                                 if (vp->v_flag & VISHARDLINK) {
6325                                     // see the comment in unlink1() about why we update
6326                                     // the parent of a hard link when it is removed
6327                                     vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
6328                                 }
6329
6330 #if CONFIG_FSE
6331                                 if (need_event) {
6332                                         add_fsevent(FSE_DELETE, ctx,
6333                                                         FSE_ARG_STRING, len, path,
6334                                                         FSE_ARG_FINFO, &finfo,
6335                                                         FSE_ARG_DONE);
6336                                 }
6337 #endif
6338                         }
6339                         if (path != NULL)
6340                                 RELEASE_PATH(path);
6341                 }
6342
6343 out:
6344                 /*
6345                  * nameidone has to happen before we vnode_put(dvp)
6346                  * since it may need to release the fs_nodelock on the dvp
6347                  */
6348                 nameidone(&nd);
6349
6350                 vnode_put(dvp);
6351                 vnode_put(vp);
6352
6353                 if (restart_flag == 0) {
6354                         wakeup_one((caddr_t)vp);
6355                         return (error);
6356                 }
6357                 tsleep(vp, PVFS, "rm AD", 1);
6358
6359         } while (restart_flag != 0);
6360
6361         return (error);
6362
6363 }
6364
6365 /* Get direntry length padded to 8 byte alignment */
6366 #define DIRENT64_LEN(namlen) \
6367         ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
6368
6369 static errno_t
6370 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
6371                 int *numdirent, vfs_context_t ctxp)
6372 {
6373         /* Check if fs natively supports VNODE_READDIR_EXTENDED */
6374         if (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) {
6375                 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
6376         } else {
6377                 size_t bufsize;
6378                 void * bufptr;
6379                 uio_t auio;
6380                 struct direntry entry64;
6381                 struct dirent *dep;
6382                 int bytesread;
6383                 int error;
6384
6385                 /*
6386                  * Our kernel buffer needs to be smaller since re-packing
6387                  * will expand each dirent.  The worse case (when the name
6388                  * length is 3) corresponds to a struct direntry size of 32
6389                  * bytes (8-byte aligned) and a struct dirent size of 12 bytes
6390                  * (4-byte aligned).  So having a buffer that is 3/8 the size
6391                  * will prevent us from reading more than we can pack.
6392                  *
6393                  * Since this buffer is wired memory, we will limit the
6394                  * buffer size to a maximum of 32K. We would really like to
6395                  * use 32K in the MIN(), but we use magic number 87371 to
6396                  * prevent uio_resid() * 3 / 8 from overflowing.
6397                  */
6398                 bufsize = 3 * MIN(uio_resid(uio), 87371) / 8;
6399                 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
6400                 if (bufptr == NULL) {
6401                         return ENOMEM;
6402                 }
6403
6404                 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
6405                 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
6406                 auio->uio_offset = uio->uio_offset;
6407
6408                 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
6409
6410                 dep = (struct dirent *)bufptr;
6411                 bytesread = bufsize - uio_resid(auio);
6412
6413                 /*
6414                  * Convert all the entries and copy them out to user's buffer.
6415                  */
6416                 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
6417                         /* Convert a dirent to a dirent64. */
6418                         entry64.d_ino = dep->d_ino;
6419                         entry64.d_seekoff = 0;
6420                         entry64.d_reclen = DIRENT64_LEN(dep->d_namlen);
6421                         entry64.d_namlen = dep->d_namlen;
6422                         entry64.d_type = dep->d_type;
6423                         bcopy(dep->d_name, entry64.d_name, dep->d_namlen + 1);
6424
6425                         /* Move to next entry. */
6426                         dep = (struct dirent *)((char *)dep + dep->d_reclen);
6427
6428                         /* Copy entry64 to user's buffer. */
6429                         error = uiomove((caddr_t)&entry64, entry64.d_reclen, uio);
6430                 }
6431
6432                 /* Update the real offset using the offset we got from VNOP_READDIR. */
6433                 if (error == 0) {
6434                         uio->uio_offset = auio->uio_offset;
6435                 }
6436                 uio_free(auio);
6437                 FREE(bufptr, M_TEMP);
6438                 return (error);
6439         }
6440 }
6441
6442 /*
6443  * Read a block of directory entries in a file system independent format.
6444  */
6445 static int
6446 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
6447                      off_t *offset, int flags)
6448 {
6449         vnode_t vp;
6450         struct vfs_context context = *vfs_context_current();    /* local copy */
6451         struct fileproc *fp;
6452         uio_t auio;
6453         int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6454         off_t loff;
6455         int error, eofflag, numdirent;
6456         char uio_buf[ UIO_SIZEOF(1) ];
6457
6458         error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
6459         if (error) {
6460                 return (error);
6461         }
6462         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
6463                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6464                 error = EBADF;
6465                 goto out;
6466         }
6467
6468 #if CONFIG_MACF
6469         error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
6470         if (error)
6471                 goto out;
6472 #endif
6473         if ( (error = vnode_getwithref(vp)) ) {
6474                 goto out;
6475         }
6476         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6477
6478 unionread:
6479         if (vp->v_type != VDIR) {
6480                 (void)vnode_put(vp);
6481                 error = EINVAL;
6482                 goto out;
6483         }
6484
6485 #if CONFIG_MACF
6486         error = mac_vnode_check_readdir(&context, vp);
6487         if (error != 0) {
6488                 (void)vnode_put(vp);
6489                 goto out;
6490         }
6491 #endif /* MAC */
6492
6493         loff = fp->f_fglob->fg_offset;
6494         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
6495         uio_addiov(auio, bufp, bufsize);
6496
6497         if (flags & VNODE_READDIR_EXTENDED) {
6498                 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
6499                 fp->f_fglob->fg_offset = uio_offset(auio);
6500         } else {
6501                 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
6502                 fp->f_fglob->fg_offset = uio_offset(auio);
6503         }
6504         if (error) {
6505                 (void)vnode_put(vp);
6506                 goto out;
6507         }
6508
6509         if ((user_ssize_t)bufsize == uio_resid(auio)){
6510                 if (union_dircheckp) {
6511                         error = union_dircheckp(&vp, fp, &context);
6512                         if (error == -1)
6513                                 goto unionread;
6514                         if (error)
6515                                 goto out;
6516                 }
6517
6518                 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) {
6519                         struct vnode *tvp = vp;
6520                         vp = vp->v_mount->mnt_vnodecovered;
6521                         vnode_getwithref(vp);
6522                         vnode_ref(vp);
6523                         fp->f_fglob->fg_data = (caddr_t) vp;
6524                         fp->f_fglob->fg_offset = 0;
6525                         vnode_rele(tvp);
6526                         vnode_put(tvp);
6527                         goto unionread;
6528                 }
6529         }
6530
6531         vnode_put(vp);
6532         if (offset) {
6533                 *offset = loff;
6534         }
6535
6536         *bytesread = bufsize - uio_resid(auio);
6537 out:
6538         file_drop(fd);
6539         return (error);
6540 }
6541
6542
6543 int
6544 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
6545 {
6546         off_t offset;
6547         ssize_t bytesread;
6548         int error;
6549
6550         AUDIT_ARG(fd, uap->fd);
6551         error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
6552
6553         if (error == 0) {
6554                 if (proc_is64bit(p)) {
6555                         user64_long_t base = (user64_long_t)offset;
6556                         error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
6557                 } else {
6558                         user32_long_t base = (user32_long_t)offset;
6559                         error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
6560                 }
6561                 *retval = bytesread;
6562         }
6563         return (error);
6564 }
6565
6566 int
6567 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
6568 {
6569         off_t offset;
6570         ssize_t bytesread;
6571         int error;
6572
6573         AUDIT_ARG(fd, uap->fd);
6574         error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
6575
6576         if (error == 0) {
6577                 *retval = bytesread;
6578                 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
6579         }
6580         return (error);
6581 }
6582
6583
6584 /*
6585  * Set the mode mask for creation of filesystem nodes.
6586  * XXX implement xsecurity
6587  */
6588 #define UMASK_NOXSECURITY        (void *)1      /* leave existing xsecurity alone */
6589 static int
6590 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
6591 {
6592         struct filedesc *fdp;
6593
6594         AUDIT_ARG(mask, newmask);
6595         proc_fdlock(p);
6596         fdp = p->p_fd;
6597         *retval = fdp->fd_cmask;
6598         fdp->fd_cmask = newmask & ALLPERMS;
6599         proc_fdunlock(p);
6600         return (0);
6601 }
6602
6603 /*
6604  * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
6605  *
6606  * Parameters:    p                       Process requesting to set the umask
6607  *                uap                     User argument descriptor (see below)
6608  *                retval                  umask of the process (parameter p)
6609  *
6610  * Indirect:      uap->newmask            umask to set
6611  *                uap->xsecurity          ACL to set
6612  *
6613  * Returns:        0                      Success
6614  *                !0                      Not success
6615  *
6616  */
6617 int
6618 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
6619 {
6620         int ciferror;
6621         kauth_filesec_t xsecdst;
6622
6623         xsecdst = KAUTH_FILESEC_NONE;
6624         if (uap->xsecurity != USER_ADDR_NULL) {
6625                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6626                         return ciferror;
6627         } else {
6628                 xsecdst = KAUTH_FILESEC_NONE;
6629         }
6630
6631         ciferror = umask1(p, uap->newmask, xsecdst, retval);
6632
6633         if (xsecdst != KAUTH_FILESEC_NONE)
6634                 kauth_filesec_free(xsecdst);
6635         return ciferror;
6636 }
6637
6638 int
6639 umask(proc_t p, struct umask_args *uap, int32_t *retval)
6640 {
6641         return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
6642 }
6643
6644 /*
6645  * Void all references to file by ripping underlying filesystem
6646  * away from vnode.
6647  */
6648 /* ARGSUSED */
6649 int
6650 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
6651 {
6652         vnode_t vp;
6653         struct vnode_attr va;
6654         vfs_context_t ctx = vfs_context_current();
6655         int error;
6656         struct nameidata nd;
6657
6658         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
6659                 UIO_USERSPACE, uap->path, ctx);
6660         error = namei(&nd);
6661         if (error)
6662                 return (error);
6663         vp = nd.ni_vp;
6664
6665         nameidone(&nd);
6666
6667         if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
6668                 error = ENOTSUP;
6669                 goto out;
6670         }
6671
6672         if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
6673                 error = EBUSY;
6674                 goto out;
6675         }
6676
6677 #if CONFIG_MACF
6678         error = mac_vnode_check_revoke(ctx, vp);
6679         if (error)
6680                 goto out;
6681 #endif
6682
6683         VATTR_INIT(&va);
6684         VATTR_WANTED(&va, va_uid);
6685         if ((error = vnode_getattr(vp, &va, ctx)))
6686                 goto out;
6687         if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
6688             (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
6689                 goto out;
6690         if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
6691                 VNOP_REVOKE(vp, REVOKEALL, ctx);
6692 out:
6693         vnode_put(vp);
6694         return (error);
6695 }
6696
6697
6698 /*
6699  *  HFS/HFS PlUS SPECIFIC SYSTEM CALLS
6700  *  The following system calls are designed to support features
6701  *  which are specific to the HFS & HFS Plus volume formats
6702  */
6703
6704 #ifdef __APPLE_API_OBSOLETE
6705
6706 /************************************************/
6707 /* *** Following calls will be deleted soon *** */
6708 /************************************************/
6709
6710 /*
6711  * Make a complex file.  A complex file is one with multiple forks (data streams)
6712  */
6713 /* ARGSUSED */
6714 int
6715 mkcomplex(__unused proc_t p, __unused struct mkcomplex_args *uap, __unused int32_t *retval)
6716 {
6717         return (ENOTSUP);
6718 }
6719
6720 /*
6721  * Extended stat call which returns volumeid and vnodeid as well as other info
6722  */
6723 /* ARGSUSED */
6724 int
6725 statv(__unused proc_t p,
6726           __unused struct statv_args *uap,
6727           __unused int32_t *retval)
6728 {
6729         return (ENOTSUP);       /*  We'll just return an error for now */
6730
6731 } /* end of statv system call */
6732
6733 /*
6734 * Extended lstat call which returns volumeid and vnodeid as well as other info
6735 */
6736 /* ARGSUSED */
6737 int
6738 lstatv(__unused proc_t p,
6739            __unused struct lstatv_args *uap,
6740            __unused int32_t *retval)
6741 {
6742        return (ENOTSUP);        /*  We'll just return an error for now */
6743 } /* end of lstatv system call */
6744
6745 /*
6746 * Extended fstat call which returns volumeid and vnodeid as well as other info
6747 */
6748 /* ARGSUSED */
6749 int
6750 fstatv(__unused proc_t p,
6751            __unused struct fstatv_args *uap,
6752            __unused int32_t *retval)
6753 {
6754        return (ENOTSUP);        /*  We'll just return an error for now */
6755 } /* end of fstatv system call */
6756
6757
6758 /************************************************/
6759 /* *** Preceding calls will be deleted soon *** */
6760 /************************************************/
6761
6762 #endif /* __APPLE_API_OBSOLETE */
6763
6764 /*
6765 * Obtain attribute information on objects in a directory while enumerating
6766 * the directory.  This call does not yet support union mounted directories.
6767 * TO DO
6768 *  1.union mounted directories.
6769 */
6770
6771 /* ARGSUSED */
6772 int
6773 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
6774 {
6775         vnode_t vp;
6776         struct fileproc *fp;
6777         uio_t auio = NULL;
6778         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6779         uint32_t count;
6780         uint32_t newstate;
6781         int error, eofflag;
6782         uint32_t loff;
6783         struct attrlist attributelist;
6784         vfs_context_t ctx = vfs_context_current();
6785         int fd = uap->fd;
6786         char uio_buf[ UIO_SIZEOF(1) ];
6787         kauth_action_t action;
6788
6789         AUDIT_ARG(fd, fd);
6790
6791         /* Get the attributes into kernel space */
6792         if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
6793                 return(error);
6794         }
6795         if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
6796                 return(error);
6797         }
6798         if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
6799                 return (error);
6800         }
6801         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
6802                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6803                 error = EBADF;
6804                 goto out;
6805         }
6806
6807
6808 #if CONFIG_MACF
6809         error = mac_file_check_change_offset(vfs_context_ucred(ctx),
6810             fp->f_fglob);
6811         if (error)
6812                 goto out;
6813 #endif
6814
6815
6816         if ( (error = vnode_getwithref(vp)) )
6817                 goto out;
6818
6819         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6820
6821         if (vp->v_type != VDIR) {
6822                 (void)vnode_put(vp);
6823                 error = EINVAL;
6824                 goto out;
6825         }
6826
6827 #if CONFIG_MACF
6828         error = mac_vnode_check_readdir(ctx, vp);
6829         if (error != 0) {
6830                 (void)vnode_put(vp);
6831                 goto out;
6832         }
6833 #endif /* MAC */
6834
6835         /* set up the uio structure which will contain the users return buffer */
6836         loff = fp->f_fglob->fg_offset;
6837         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ,
6838             &uio_buf[0], sizeof(uio_buf));
6839         uio_addiov(auio, uap->buffer, uap->buffersize);
6840
6841         /*
6842          * If the only item requested is file names, we can let that past with
6843          * just LIST_DIRECTORY.  If they want any other attributes, that means
6844          * they need SEARCH as well.
6845          */
6846         action = KAUTH_VNODE_LIST_DIRECTORY;
6847         if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
6848             attributelist.fileattr || attributelist.dirattr)
6849                 action |= KAUTH_VNODE_SEARCH;
6850
6851         if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
6852
6853                 /* Believe it or not, uap->options only has 32-bits of valid
6854                  * info, so truncate before extending again */
6855                 error = VNOP_READDIRATTR(vp, &attributelist, auio,
6856                                          count,
6857                                          (u_long)(uint32_t)uap->options, &newstate, &eofflag,
6858                                          &count, ctx);
6859         }
6860         (void)vnode_put(vp);
6861
6862         if (error)
6863                 goto out;
6864         fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
6865
6866         if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
6867                 goto out;
6868         if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
6869                 goto out;
6870         if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
6871                 goto out;
6872
6873         *retval = eofflag;  /* similar to getdirentries */
6874         error = 0;
6875 out:
6876         file_drop(fd);
6877         return (error); /* return error earlier, an retval of 0 or 1 now */
6878
6879 } /* end of getdirentryattr system call */
6880
6881 /*
6882 * Exchange data between two files
6883 */
6884
6885 /* ARGSUSED */
6886 int
6887 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
6888 {
6889
6890         struct nameidata fnd, snd;
6891         vfs_context_t ctx = vfs_context_current();
6892         vnode_t fvp;
6893         vnode_t svp;
6894         int error;
6895         u_int32_t nameiflags;
6896         char *fpath = NULL;
6897         char *spath = NULL;
6898         int   flen=0, slen=0;
6899         int from_truncated=0, to_truncated=0;
6900 #if CONFIG_FSE
6901         fse_info f_finfo, s_finfo;
6902 #endif
6903
6904         nameiflags = 0;
6905         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6906
6907     NDINIT(&fnd, LOOKUP, nameiflags | AUDITVNPATH1,
6908                 UIO_USERSPACE, uap->path1, ctx);
6909
6910     error = namei(&fnd);
6911     if (error)
6912         goto out2;
6913
6914         nameidone(&fnd);
6915         fvp = fnd.ni_vp;
6916
6917     NDINIT(&snd, LOOKUP | CN_NBMOUNTLOOK, nameiflags | AUDITVNPATH2,
6918                 UIO_USERSPACE, uap->path2, ctx);
6919
6920     error = namei(&snd);
6921     if (error) {
6922                 vnode_put(fvp);
6923                 goto out2;
6924     }
6925         nameidone(&snd);
6926         svp = snd.ni_vp;
6927
6928         /*
6929          * if the files are the same, return an inval error
6930          */
6931         if (svp == fvp) {
6932                 error = EINVAL;
6933                 goto out;
6934         }
6935
6936         /*
6937          * if the files are on different volumes, return an error
6938          */
6939         if (svp->v_mount != fvp->v_mount) {
6940                 error = EXDEV;
6941                 goto out;
6942         }
6943
6944 #if CONFIG_MACF
6945         error = mac_vnode_check_exchangedata(ctx,
6946             fvp, svp);
6947         if (error)
6948                 goto out;
6949 #endif
6950         if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
6951             ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
6952                 goto out;
6953
6954         if (
6955 #if CONFIG_FSE
6956         need_fsevent(FSE_EXCHANGE, fvp) ||
6957 #endif
6958         kauth_authorize_fileop_has_listeners()) {
6959                 GET_PATH(fpath);
6960                 GET_PATH(spath);
6961                 if (fpath == NULL || spath == NULL) {
6962                         error = ENOMEM;
6963                         goto out;
6964                 }
6965
6966                 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
6967                 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
6968
6969 #if CONFIG_FSE
6970                 get_fse_info(fvp, &f_finfo, ctx);
6971                 get_fse_info(svp, &s_finfo, ctx);
6972                 if (from_truncated || to_truncated) {
6973                         // set it here since only the f_finfo gets reported up to user space
6974                         f_finfo.mode |= FSE_TRUNCATED_PATH;
6975                 }
6976 #endif
6977         }
6978         /* Ok, make the call */
6979         error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
6980
6981         if (error == 0) {
6982             const char *tmpname;
6983
6984             if (fpath != NULL && spath != NULL) {
6985                     /* call out to allow 3rd party notification of exchangedata.
6986                      * Ignore result of kauth_authorize_fileop call.
6987                      */
6988                     kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
6989                                            (uintptr_t)fpath, (uintptr_t)spath);
6990             }
6991             name_cache_lock();
6992
6993             tmpname     = fvp->v_name;
6994             fvp->v_name = svp->v_name;
6995             svp->v_name = tmpname;
6996
6997             if (fvp->v_parent != svp->v_parent) {
6998                 vnode_t tmp;
6999
7000                 tmp           = fvp->v_parent;
7001                 fvp->v_parent = svp->v_parent;
7002                 svp->v_parent = tmp;
7003             }
7004             name_cache_unlock();
7005
7006 #if CONFIG_FSE
7007             if (fpath != NULL && spath != NULL) {
7008                     add_fsevent(FSE_EXCHANGE, ctx,
7009                                 FSE_ARG_STRING, flen, fpath,
7010                                 FSE_ARG_FINFO, &f_finfo,
7011                                 FSE_ARG_STRING, slen, spath,
7012                                 FSE_ARG_FINFO, &s_finfo,
7013                                 FSE_ARG_DONE);
7014             }
7015 #endif
7016         }
7017
7018 out:
7019         if (fpath != NULL)
7020                 RELEASE_PATH(fpath);
7021         if (spath != NULL)
7022                 RELEASE_PATH(spath);
7023         vnode_put(svp);
7024         vnode_put(fvp);
7025 out2:
7026         return (error);
7027 }
7028
7029
7030 /* ARGSUSED */
7031
7032 int
7033 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
7034 {
7035         vnode_t vp;
7036         int error=0;
7037         int fserror = 0;
7038         struct nameidata nd;
7039         struct user64_fssearchblock searchblock;
7040         struct searchstate *state;
7041         struct attrlist *returnattrs;
7042         struct timeval timelimit;
7043         void *searchparams1,*searchparams2;
7044         uio_t auio = NULL;
7045         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7046         uint32_t nummatches;
7047         int mallocsize;
7048         uint32_t nameiflags;
7049         vfs_context_t ctx = vfs_context_current();
7050         char uio_buf[ UIO_SIZEOF(1) ];
7051
7052         /* Start by copying in fsearchblock paramater list */
7053     if (IS_64BIT_PROCESS(p)) {
7054         error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
7055         timelimit.tv_sec = searchblock.timelimit.tv_sec;
7056         timelimit.tv_usec = searchblock.timelimit.tv_usec;
7057     }
7058     else {
7059         struct user32_fssearchblock tmp_searchblock;
7060
7061         error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
7062         // munge into 64-bit version
7063         searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
7064         searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
7065         searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
7066         searchblock.maxmatches = tmp_searchblock.maxmatches;
7067                 /*
7068                  * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
7069                  * from a 32 bit long, and tv_usec is already a signed 32 bit int.
7070                  */
7071         timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
7072         timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
7073         searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
7074         searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
7075         searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
7076         searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
7077         searchblock.searchattrs = tmp_searchblock.searchattrs;
7078     }
7079         if (error)
7080                 return(error);
7081
7082         /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
7083          */
7084         if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
7085                 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
7086                 return(EINVAL);
7087
7088         /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
7089         /* It all has to do into local memory and it's not that big so we might as well  put it all together. */
7090         /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
7091         /* block.                                                                                             */
7092
7093         mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
7094                       sizeof(struct attrlist) + sizeof(struct searchstate);
7095
7096         MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
7097
7098         /* Now set up the various pointers to the correct place in our newly allocated memory */
7099
7100         searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
7101         returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
7102         state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
7103
7104         /* Now copy in the stuff given our local variables. */
7105
7106         if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
7107                 goto freeandexit;
7108
7109         if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
7110                 goto freeandexit;
7111
7112         if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
7113                 goto freeandexit;
7114
7115         if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
7116                 goto freeandexit;
7117
7118
7119         /*
7120          * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
7121          * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
7122          * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
7123          * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
7124          * validate the user-supplied data offset of the attrreference_t, we'll do it here.
7125          */
7126
7127         if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
7128                 attrreference_t* string_ref;
7129                 u_int32_t* start_length;
7130                 user64_size_t param_length;
7131
7132                 /* validate searchparams1 */
7133                 param_length = searchblock.sizeofsearchparams1;
7134                 /* skip the word that specifies length of the buffer */
7135                 start_length= (u_int32_t*) searchparams1;
7136                 start_length= start_length+1;
7137                 string_ref= (attrreference_t*) start_length;
7138
7139                 /* ensure no negative offsets or too big offsets */
7140                 if (string_ref->attr_dataoffset < 0 ) {
7141                         error = EINVAL;
7142                         goto freeandexit;
7143                 }
7144                 if (string_ref->attr_length > MAXPATHLEN) {
7145                         error = EINVAL;
7146                         goto freeandexit;
7147                 }
7148
7149                 /* Check for pointer overflow in the string ref */
7150                 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
7151                         error = EINVAL;
7152                         goto freeandexit;
7153                 }
7154
7155                 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
7156                         error = EINVAL;
7157                         goto freeandexit;
7158                 }
7159                 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
7160                         error = EINVAL;
7161                         goto freeandexit;
7162                 }
7163         }
7164
7165         /* set up the uio structure which will contain the users return buffer */
7166         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
7167                                                                   &uio_buf[0], sizeof(uio_buf));
7168     uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
7169
7170         nameiflags = 0;
7171         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
7172         NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1,
7173                 UIO_USERSPACE, uap->path, ctx);
7174
7175         error = namei(&nd);
7176         if (error)
7177                 goto freeandexit;
7178
7179         nameidone(&nd);
7180         vp = nd.ni_vp;
7181
7182
7183         /*
7184          * If searchblock.maxmatches == 0, then skip the search. This has happened
7185          * before and sometimes the underlyning code doesnt deal with it well.
7186          */
7187          if (searchblock.maxmatches == 0) {
7188                 nummatches = 0;
7189                 goto saveandexit;
7190          }
7191
7192         /*
7193            Allright, we have everything we need, so lets make that call.
7194
7195            We keep special track of the return value from the file system:
7196            EAGAIN is an acceptable error condition that shouldn't keep us
7197            from copying out any results...
7198          */
7199
7200         fserror = VNOP_SEARCHFS(vp,
7201                                                         searchparams1,
7202                                                         searchparams2,
7203                                                         &searchblock.searchattrs,
7204                                                         (u_long)searchblock.maxmatches,
7205                                                         &timelimit,
7206                                                         returnattrs,
7207                                                         &nummatches,
7208                                                         (u_long)uap->scriptcode,
7209                                                         (u_long)uap->options,
7210                                                         auio,
7211                                                         state,
7212                                                         ctx);
7213
7214 saveandexit:
7215
7216         vnode_put(vp);
7217
7218         /* Now copy out the stuff that needs copying out. That means the number of matches, the
7219            search state.  Everything was already put into he return buffer by the vop call. */
7220
7221         if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
7222                 goto freeandexit;
7223
7224     if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
7225                 goto freeandexit;
7226
7227         error = fserror;
7228
7229 freeandexit:
7230
7231         FREE(searchparams1,M_TEMP);
7232
7233         return(error);
7234
7235
7236 } /* end of searchfs system call */
7237
7238
7239 /*
7240  * Make a filesystem-specific control call:
7241  */
7242 /* ARGSUSED */
7243 static int
7244 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
7245 {
7246         int error=0;
7247         boolean_t is64bit;
7248         u_int size;
7249 #define STK_PARAMS 128
7250         char stkbuf[STK_PARAMS];
7251         caddr_t data, memp;
7252         vnode_t vp = *arg_vp;
7253
7254         size = IOCPARM_LEN(cmd);
7255         if (size > IOCPARM_MAX) return (EINVAL);
7256
7257     is64bit = proc_is64bit(p);
7258
7259         memp = NULL;
7260         if (size > sizeof (stkbuf)) {
7261                 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
7262                 data = memp;
7263         } else {
7264                 data = &stkbuf[0];
7265         };
7266
7267         if (cmd & IOC_IN) {
7268                 if (size) {
7269                         error = copyin(udata, data, size);
7270                         if (error) goto FSCtl_Exit;
7271                 } else {
7272                     if (is64bit) {
7273                         *(user_addr_t *)data = udata;
7274                     }
7275                     else {
7276                         *(uint32_t *)data = (uint32_t)udata;
7277                     }
7278                 };
7279         } else if ((cmd & IOC_OUT) && size) {
7280                 /*
7281                  * Zero the buffer so the user always
7282                  * gets back something deterministic.
7283                  */
7284                 bzero(data, size);
7285         } else if (cmd & IOC_VOID) {
7286                 if (is64bit) {
7287                     *(user_addr_t *)data = udata;
7288                 }
7289                 else {
7290                     *(uint32_t *)data = (uint32_t)udata;
7291                 }
7292         }
7293
7294         /* Check to see if it's a generic command */
7295         if (IOCBASECMD(cmd) == FSCTL_SYNC_VOLUME) {
7296                 mount_t mp = vp->v_mount;
7297                 int arg = *(uint32_t*)data;
7298
7299                 /* record vid of vp so we can drop it below. */
7300                 uint32_t vvid = vp->v_id;
7301
7302                 /*
7303                  * Then grab mount_iterref so that we can release the vnode.
7304                  * Without this, a thread may call vnode_iterate_prepare then
7305                  * get into a deadlock because we've never released the root vp
7306                  */
7307                 error = mount_iterref (mp, 0);
7308                 if (error)  {
7309                         goto FSCtl_Exit;
7310                 }
7311                 vnode_put(vp);
7312
7313                 /* issue the sync for this volume */
7314                 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
7315
7316                 /*
7317                  * Then release the mount_iterref once we're done syncing; it's not
7318                  * needed for the VNOP_IOCTL below
7319                  */
7320                 mount_iterdrop(mp);
7321
7322                 if (arg & FSCTL_SYNC_FULLSYNC) {
7323                         /* re-obtain vnode iocount on the root vp, if possible */
7324                         error = vnode_getwithvid (vp, vvid);
7325                         if (error == 0) {
7326                                 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
7327                                 vnode_put (vp);
7328                         }
7329                 }
7330                 /* mark the argument VP as having been released */
7331                 *arg_vp = NULL;
7332
7333         } else if (IOCBASECMD(cmd) == FSCTL_SET_PACKAGE_EXTS) {
7334             user_addr_t ext_strings;
7335             uint32_t    num_entries;
7336             uint32_t    max_width;
7337
7338             if (   (is64bit && size != sizeof(user64_package_ext_info))
7339                 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
7340
7341                 // either you're 64-bit and passed a 64-bit struct or
7342                 // you're 32-bit and passed a 32-bit struct.  otherwise
7343                 // it's not ok.
7344                 error = EINVAL;
7345                 goto FSCtl_Exit;
7346             }
7347
7348             if (is64bit) {
7349                 ext_strings = ((user64_package_ext_info *)data)->strings;
7350                 num_entries = ((user64_package_ext_info *)data)->num_entries;
7351                 max_width   = ((user64_package_ext_info *)data)->max_width;
7352             } else {
7353                 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
7354                 num_entries = ((user32_package_ext_info *)data)->num_entries;
7355                 max_width   = ((user32_package_ext_info *)data)->max_width;
7356             }
7357
7358             error = set_package_extensions_table(ext_strings, num_entries, max_width);
7359
7360         } else if (IOCBASECMD(cmd) == FSCTL_WAIT_FOR_SYNC) {
7361                 error = tsleep((caddr_t)&sync_wait_time, PVFS|PCATCH, "sync-wait", 0);
7362                 if (error == 0) {
7363                         *(uint32_t *)data = (uint32_t)sync_wait_time;
7364                         error = 0;
7365                 } else {
7366                         error *= -1;
7367                 }
7368
7369         } else {
7370                 /* Invoke the filesystem-specific code */
7371                 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
7372         }
7373
7374
7375         /*
7376          * Copy any data to user, size was
7377          * already set and checked above.
7378          */
7379         if (error == 0 && (cmd & IOC_OUT) && size)
7380                 error = copyout(data, udata, size);
7381
7382 FSCtl_Exit:
7383         if (memp) kfree(memp, size);
7384
7385         return error;
7386 }
7387
7388 /* ARGSUSED */
7389 int
7390 fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
7391 {
7392         int error;
7393         struct nameidata nd;
7394         u_long nameiflags;
7395         vnode_t vp = NULL;
7396         vfs_context_t ctx = vfs_context_current();
7397
7398         AUDIT_ARG(cmd, uap->cmd);
7399         AUDIT_ARG(value32, uap->options);
7400         /* Get the vnode for the file we are getting info on:  */
7401         nameiflags = 0;
7402         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
7403         NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, UIO_USERSPACE,
7404             uap->path, ctx);
7405         if ((error = namei(&nd))) goto done;
7406         vp = nd.ni_vp;
7407         nameidone(&nd);
7408
7409 #if CONFIG_MACF
7410         error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
7411         if (error) {
7412                 goto done;
7413         }
7414 #endif
7415
7416         error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
7417
7418 done:
7419         if (vp)
7420                 vnode_put(vp);
7421         return error;
7422 }
7423 /* ARGSUSED */
7424 int
7425 ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
7426 {
7427         int error;
7428         vnode_t vp = NULL;
7429         vfs_context_t ctx = vfs_context_current();
7430         int fd = -1;
7431
7432         AUDIT_ARG(fd, uap->fd);
7433         AUDIT_ARG(cmd, uap->cmd);
7434         AUDIT_ARG(value32, uap->options);
7435
7436         /* Get the vnode for the file we are getting info on:  */
7437         if ((error = file_vnode(uap->fd, &vp)))
7438                 goto done;
7439         fd = uap->fd;
7440         if ((error = vnode_getwithref(vp))) {
7441                 goto done;
7442         }
7443
7444 #if CONFIG_MACF
7445         error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
7446         if (error) {
7447                 goto done;
7448         }
7449 #endif
7450
7451         error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
7452
7453 done:
7454         if (fd != -1)
7455                 file_drop(fd);
7456
7457         if (vp)
7458                 vnode_put(vp);
7459         return error;
7460 }
7461 /* end of fsctl system call */
7462
7463 /*
7464  * An in-kernel sync for power management to call.
7465  */
7466 __private_extern__ int
7467 sync_internal(void)
7468 {
7469         int error;
7470
7471         struct sync_args data;
7472
7473         int retval[2];
7474
7475
7476         error = sync(current_proc(), &data, &retval[0]);
7477
7478
7479         return (error);
7480 } /* end of sync_internal call */
7481
7482
7483 /*
7484  *  Retrieve the data of an extended attribute.
7485  */
7486 int
7487 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
7488 {
7489         vnode_t vp;
7490         struct nameidata nd;
7491         char attrname[XATTR_MAXNAMELEN+1];
7492         vfs_context_t ctx = vfs_context_current();
7493         uio_t auio = NULL;
7494         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7495         size_t attrsize = 0;
7496         size_t namelen;
7497         u_int32_t nameiflags;
7498         int error;
7499         char uio_buf[ UIO_SIZEOF(1) ];
7500
7501         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
7502                 return (EINVAL);
7503
7504         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
7505         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
7506         if ((error = namei(&nd))) {
7507                 return (error);
7508         }
7509         vp = nd.ni_vp;
7510         nameidone(&nd);
7511
7512         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
7513                 goto out;
7514         }
7515         if (xattr_protected(attrname)) {
7516                 error = EPERM;
7517                 goto out;
7518         }
7519         /*
7520          * the specific check for 0xffffffff is a hack to preserve
7521          * binaray compatibilty in K64 with applications that discovered
7522          * that passing in a buf pointer and a size of -1 resulted in
7523          * just the size of the indicated extended attribute being returned.
7524          * this isn't part of the documented behavior, but because of the
7525          * original implemtation's check for "uap->size > 0", this behavior
7526          * was allowed. In K32 that check turned into a signed comparison
7527          * even though uap->size is unsigned...  in K64, we blow by that
7528          * check because uap->size is unsigned and doesn't get sign smeared
7529          * in the munger for a 32 bit user app.  we also need to add a
7530          * check to limit the maximum size of the buffer being passed in...
7531          * unfortunately, the underlying fileystems seem to just malloc
7532          * the requested size even if the actual extended attribute is tiny.
7533          * because that malloc is for kernel wired memory, we have to put a
7534          * sane limit on it.
7535          *
7536          * U32 running on K64 will yield 0x00000000ffffffff for uap->size
7537          * U64 running on K64 will yield -1 (64 bits wide)
7538          * U32/U64 running on K32 will yield -1 (32 bits wide)
7539          */
7540         if (uap->size == 0xffffffff || uap->size == (size_t)-1)
7541                 goto no_uio;
7542
7543         if (uap->size > (size_t)XATTR_MAXSIZE)
7544                 uap->size = XATTR_MAXSIZE;
7545
7546         if (uap->value) {
7547                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
7548                                             &uio_buf[0], sizeof(uio_buf));
7549                 uio_addiov(auio, uap->value, uap->size);
7550         }
7551 no_uio:
7552         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
7553 out:
7554         vnode_put(vp);
7555
7556         if (auio) {
7557                 *retval = uap->size - uio_resid(auio);
7558         } else {
7559                 *retval = (user_ssize_t)attrsize;
7560         }
7561
7562         return (error);
7563 }
7564
7565 /*
7566  * Retrieve the data of an extended attribute.
7567  */
7568 int
7569 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
7570 {
7571         vnode_t vp;
7572         char attrname[XATTR_MAXNAMELEN+1];
7573         uio_t auio = NULL;
7574         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7575         size_t attrsize = 0;
7576         size_t namelen;
7577         int error;
7578         char uio_buf[ UIO_SIZEOF(1) ];
7579
7580         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
7581                 return (EINVAL);
7582
7583         if ( (error = file_vnode(uap->fd, &vp)) ) {
7584                 return (error);
7585         }
7586         if ( (error = vnode_getwithref(vp)) ) {
7587                 file_drop(uap->fd);
7588                 return(error);
7589         }
7590         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
7591                 goto out;
7592         }
7593         if (xattr_protected(attrname)) {
7594                 error = EPERM;
7595                 goto out;
7596         }
7597         if (uap->value && uap->size > 0) {
7598                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
7599                                             &uio_buf[0], sizeof(uio_buf));
7600                 uio_addiov(auio, uap->value, uap->size);
7601         }
7602
7603         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
7604 out:
7605         (void)vnode_put(vp);
7606         file_drop(uap->fd);
7607
7608         if (auio) {
7609                 *retval = uap->size - uio_resid(auio);
7610         } else {
7611                 *retval = (user_ssize_t)attrsize;
7612         }
7613         return (error);
7614 }
7615
7616 /*
7617  * Set the data of an extended attribute.
7618  */
7619 int
7620 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
7621 {
7622         vnode_t vp;
7623         struct nameidata nd;
7624         char attrname[XATTR_MAXNAMELEN+1];
7625         vfs_context_t ctx = vfs_context_current();
7626         uio_t auio = NULL;
7627         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7628         size_t namelen;
7629         u_int32_t nameiflags;
7630         int error;
7631         char uio_buf[ UIO_SIZEOF(1) ];
7632
7633         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
7634                 return (EINVAL);
7635
7636         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
7637                 return (error);
7638         }
7639         if (xattr_protected(attrname))
7640                 return(EPERM);
7641         if (uap->size != 0 && uap->value == 0) {
7642                 return (EINVAL);
7643         }
7644
7645         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
7646         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
7647         if ((error = namei(&nd))) {
7648                 return (error);
7649         }
7650         vp = nd.ni_vp;
7651         nameidone(&nd);
7652
7653         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
7654                                     &uio_buf[0], sizeof(uio_buf));
7655         uio_addiov(auio, uap->value, uap->size);
7656
7657         error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
7658 #if CONFIG_FSE
7659         if (error == 0) {
7660                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
7661                     FSE_ARG_VNODE, vp,
7662                     FSE_ARG_DONE);
7663         }
7664 #endif
7665         vnode_put(vp);
7666         *retval = 0;
7667         return (error);
7668 }
7669
7670 /*
7671  * Set the data of an extended attribute.
7672  */
7673 int
7674 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
7675 {
7676         vnode_t vp;
7677         char attrname[XATTR_MAXNAMELEN+1];
7678         uio_t auio = NULL;
7679         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7680         size_t namelen;
7681         int error;
7682         char uio_buf[ UIO_SIZEOF(1) ];
7683         vfs_context_t ctx = vfs_context_current();
7684
7685         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
7686                 return (EINVAL);
7687
7688         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
7689                 return (error);
7690         }
7691         if (xattr_protected(attrname))
7692                 return(EPERM);
7693         if (uap->size != 0 && uap->value == 0) {
7694                 return (EINVAL);
7695         }
7696         if ( (error = file_vnode(uap->fd, &vp)) ) {
7697                 return (error);
7698         }
7699         if ( (error = vnode_getwithref(vp)) ) {
7700                 file_drop(uap->fd);
7701                 return(error);
7702         }
7703         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
7704                                     &uio_buf[0], sizeof(uio_buf));
7705         uio_addiov(auio, uap->value, uap->size);
7706
7707         error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
7708 #if CONFIG_FSE
7709         if (error == 0) {
7710                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
7711                     FSE_ARG_VNODE, vp,
7712                     FSE_ARG_DONE);
7713         }
7714 #endif
7715         vnode_put(vp);
7716         file_drop(uap->fd);
7717         *retval = 0;
7718         return (error);
7719 }
7720
7721 /*
7722  * Remove an extended attribute.
7723  * XXX Code duplication here.
7724  */
7725 int
7726 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
7727 {
7728         vnode_t vp;
7729         struct nameidata nd;
7730         char attrname[XATTR_MAXNAMELEN+1];
7731         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7732         vfs_context_t ctx = vfs_context_current();
7733         size_t namelen;
7734         u_int32_t nameiflags;
7735         int error;
7736
7737         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
7738                 return (EINVAL);
7739
7740         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
7741         if (error != 0) {
7742                 return (error);
7743         }
7744         if (xattr_protected(attrname))
7745                 return(EPERM);
7746         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
7747         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
7748         if ((error = namei(&nd))) {
7749                 return (error);
7750         }
7751         vp = nd.ni_vp;
7752         nameidone(&nd);
7753
7754         error = vn_removexattr(vp, attrname, uap->options, ctx);
7755 #if CONFIG_FSE
7756         if (error == 0) {
7757                 add_fsevent(FSE_XATTR_REMOVED, ctx,
7758                     FSE_ARG_VNODE, vp,
7759                     FSE_ARG_DONE);
7760         }
7761 #endif
7762         vnode_put(vp);
7763         *retval = 0;
7764         return (error);
7765 }
7766
7767 /*
7768  * Remove an extended attribute.
7769  * XXX Code duplication here.
7770  */
7771 int
7772 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
7773 {
7774         vnode_t vp;
7775         char attrname[XATTR_MAXNAMELEN+1];
7776         size_t namelen;
7777         int error;
7778         vfs_context_t ctx = vfs_context_current();
7779
7780         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
7781                 return (EINVAL);
7782
7783         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
7784         if (error != 0) {
7785                 return (error);
7786         }
7787         if (xattr_protected(attrname))
7788                 return(EPERM);
7789         if ( (error = file_vnode(uap->fd, &vp)) ) {
7790                 return (error);
7791         }
7792         if ( (error = vnode_getwithref(vp)) ) {
7793                 file_drop(uap->fd);
7794                 return(error);
7795         }
7796
7797         error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
7798 #if CONFIG_FSE
7799         if (error == 0) {
7800                 add_fsevent(FSE_XATTR_REMOVED, ctx,
7801                     FSE_ARG_VNODE, vp,
7802                     FSE_ARG_DONE);
7803         }
7804 #endif
7805         vnode_put(vp);
7806         file_drop(uap->fd);
7807         *retval = 0;
7808         return (error);
7809 }
7810
7811 /*
7812  * Retrieve the list of extended attribute names.
7813  * XXX Code duplication here.
7814  */
7815 int
7816 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
7817 {
7818         vnode_t vp;
7819         struct nameidata nd;
7820         vfs_context_t ctx = vfs_context_current();
7821         uio_t auio = NULL;
7822         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7823         size_t attrsize = 0;
7824         u_int32_t nameiflags;
7825         int error;
7826         char uio_buf[ UIO_SIZEOF(1) ];
7827
7828         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
7829                 return (EINVAL);
7830
7831         nameiflags = ((uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW) | NOTRIGGER;
7832         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
7833         if ((error = namei(&nd))) {
7834                 return (error);
7835         }
7836         vp = nd.ni_vp;
7837         nameidone(&nd);
7838         if (uap->namebuf != 0 && uap->bufsize > 0) {
7839                 auio = uio_createwithbuffer(1, 0, spacetype,
7840                                                                           UIO_READ, &uio_buf[0], sizeof(uio_buf));
7841                 uio_addiov(auio, uap->namebuf, uap->bufsize);
7842         }
7843
7844         error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
7845
7846         vnode_put(vp);
7847         if (auio) {
7848                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
7849         } else {
7850                 *retval = (user_ssize_t)attrsize;
7851         }
7852         return (error);
7853 }
7854
7855 /*
7856  * Retrieve the list of extended attribute names.
7857  * XXX Code duplication here.
7858  */
7859 int
7860 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
7861 {
7862         vnode_t vp;
7863         uio_t auio = NULL;
7864         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7865         size_t attrsize = 0;
7866         int error;
7867         char uio_buf[ UIO_SIZEOF(1) ];
7868
7869         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
7870                 return (EINVAL);
7871
7872         if ( (error = file_vnode(uap->fd, &vp)) ) {
7873                 return (error);
7874         }
7875         if ( (error = vnode_getwithref(vp)) ) {
7876                 file_drop(uap->fd);
7877                 return(error);
7878         }
7879         if (uap->namebuf != 0 && uap->bufsize > 0) {
7880                 auio = uio_createwithbuffer(1, 0, spacetype,
7881                                                                           UIO_READ, &uio_buf[0], sizeof(uio_buf));
7882                 uio_addiov(auio, uap->namebuf, uap->bufsize);
7883         }
7884
7885         error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
7886
7887         vnode_put(vp);
7888         file_drop(uap->fd);
7889         if (auio) {
7890                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
7891         } else {
7892                 *retval = (user_ssize_t)attrsize;
7893         }
7894         return (error);
7895 }
7896
7897 /*
7898  * Obtain the full pathname of a file system object by id.
7899  *
7900  * This is a private SPI used by the File Manager.
7901  */
7902 __private_extern__
7903 int
7904 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
7905 {
7906         vnode_t vp;
7907         struct mount *mp = NULL;
7908         vfs_context_t ctx = vfs_context_current();
7909         fsid_t fsid;
7910         char *realpath;
7911         int bpflags;
7912         int length;
7913         int error;
7914
7915         if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
7916                 return (error);
7917         }
7918         AUDIT_ARG(value32, fsid.val[0]);
7919         AUDIT_ARG(value64, uap->objid);
7920         /* Restrict output buffer size for now. */
7921         if (uap->bufsize > PAGE_SIZE) {
7922                 return (EINVAL);
7923         }
7924         MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
7925         if (realpath == NULL) {
7926                 return (ENOMEM);
7927         }
7928         /* Find the target mountpoint. */
7929         if ((mp = mount_lookupby_volfsid(fsid.val[0], 1)) == NULL) {
7930                 error = ENOTSUP;  /* unexpected failure */
7931                 goto out;
7932         }
7933         /* Find the target vnode. */
7934         if (uap->objid == 2) {
7935                 error = VFS_ROOT(mp, &vp, ctx);
7936         } else {
7937                 error = VFS_VGET(mp, (ino64_t)uap->objid, &vp, ctx);
7938         }
7939         vfs_unbusy(mp);
7940         if (error) {
7941                 goto out;
7942         }
7943         /* Obtain the absolute path to this vnode. */
7944         bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
7945         error = build_path(vp, realpath, uap->bufsize, &length, bpflags, ctx);
7946         vnode_put(vp);
7947         if (error) {
7948                 goto out;
7949         }
7950         AUDIT_ARG(text, realpath);
7951         error = copyout((caddr_t)realpath, uap->buf, length);
7952
7953         *retval = (user_ssize_t)length; /* may be superseded by error */
7954 out:
7955         if (realpath) {
7956                 FREE(realpath, M_TEMP);
7957         }
7958         return (error);
7959 }
7960
7961 /*
7962  * Common routine to handle various flavors of statfs data heading out
7963  *      to user space.
7964  *
7965  * Returns:     0                       Success
7966  *              EFAULT
7967  */
7968 static int
7969 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
7970     user_addr_t bufp, int *sizep, boolean_t is_64_bit,
7971     boolean_t partial_copy)
7972 {
7973         int             error;
7974         int             my_size, copy_size;
7975
7976         if (is_64_bit) {
7977                 struct user64_statfs sfs;
7978                 my_size = copy_size = sizeof(sfs);
7979                 bzero(&sfs, my_size);
7980                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
7981                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
7982                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
7983                 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
7984                 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
7985                 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
7986                 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
7987                 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
7988                 sfs.f_files = (user64_long_t)sfsp->f_files;
7989                 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
7990                 sfs.f_fsid = sfsp->f_fsid;
7991                 sfs.f_owner = sfsp->f_owner;
7992                 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
7993                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
7994                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
7995
7996                 if (partial_copy) {
7997                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
7998                 }
7999                 error = copyout((caddr_t)&sfs, bufp, copy_size);
8000         }
8001         else {
8002                 struct user32_statfs sfs;
8003
8004                 my_size = copy_size = sizeof(sfs);
8005                 bzero(&sfs, my_size);
8006
8007                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
8008                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
8009                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
8010
8011                 /*
8012                  * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
8013                  * have to fudge the numbers here in that case.   We inflate the blocksize in order
8014                  * to reflect the filesystem size as best we can.
8015                  */
8016                 if ((sfsp->f_blocks > INT_MAX)
8017                         /* Hack for 4061702 . I think the real fix is for Carbon to
8018                          * look for some volume capability and not depend on hidden
8019                          * semantics agreed between a FS and carbon.
8020                          * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
8021                          * for Carbon to set bNoVolumeSizes volume attribute.
8022                          * Without this the webdavfs files cannot be copied onto
8023                          * disk as they look huge. This change should not affect
8024                          * XSAN as they should not setting these to -1..
8025                          */
8026                          && (sfsp->f_blocks != 0xffffffffffffffffULL)
8027                          && (sfsp->f_bfree != 0xffffffffffffffffULL)
8028                          && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
8029                         int             shift;
8030
8031                         /*
8032                          * Work out how far we have to shift the block count down to make it fit.
8033                          * Note that it's possible to have to shift so far that the resulting
8034                          * blocksize would be unreportably large.  At that point, we will clip
8035                          * any values that don't fit.
8036                          *
8037                          * For safety's sake, we also ensure that f_iosize is never reported as
8038                          * being smaller than f_bsize.
8039                          */
8040                         for (shift = 0; shift < 32; shift++) {
8041                                 if ((sfsp->f_blocks >> shift) <= INT_MAX)
8042                                         break;
8043                                 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
8044                                         break;
8045                         }
8046 #define __SHIFT_OR_CLIP(x, s)   ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
8047                         sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
8048                         sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
8049                         sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
8050 #undef __SHIFT_OR_CLIP
8051                         sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
8052                         sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
8053                 } else {
8054                         /* filesystem is small enough to be reported honestly */
8055                         sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
8056                         sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
8057                         sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
8058                         sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
8059                         sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
8060                 }
8061                 sfs.f_files = (user32_long_t)sfsp->f_files;
8062                 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
8063                 sfs.f_fsid = sfsp->f_fsid;
8064                 sfs.f_owner = sfsp->f_owner;
8065                 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
8066                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
8067                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
8068
8069                 if (partial_copy) {
8070                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
8071                 }
8072                 error = copyout((caddr_t)&sfs, bufp, copy_size);
8073         }
8074
8075         if (sizep != NULL) {
8076                 *sizep = my_size;
8077         }
8078         return(error);
8079 }
8080
8081 /*
8082  * copy stat structure into user_stat structure.
8083  */
8084 void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
8085 {
8086         bzero(usbp, sizeof(*usbp));
8087
8088         usbp->st_dev = sbp->st_dev;
8089         usbp->st_ino = sbp->st_ino;
8090         usbp->st_mode = sbp->st_mode;
8091         usbp->st_nlink = sbp->st_nlink;
8092         usbp->st_uid = sbp->st_uid;
8093         usbp->st_gid = sbp->st_gid;
8094         usbp->st_rdev = sbp->st_rdev;
8095 #ifndef _POSIX_C_SOURCE
8096         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
8097         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
8098         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
8099         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
8100         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
8101         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
8102 #else
8103         usbp->st_atime = sbp->st_atime;
8104         usbp->st_atimensec = sbp->st_atimensec;
8105         usbp->st_mtime = sbp->st_mtime;
8106         usbp->st_mtimensec = sbp->st_mtimensec;
8107         usbp->st_ctime = sbp->st_ctime;
8108         usbp->st_ctimensec = sbp->st_ctimensec;
8109 #endif
8110         usbp->st_size = sbp->st_size;
8111         usbp->st_blocks = sbp->st_blocks;
8112         usbp->st_blksize = sbp->st_blksize;
8113         usbp->st_flags = sbp->st_flags;
8114         usbp->st_gen = sbp->st_gen;
8115         usbp->st_lspare = sbp->st_lspare;
8116         usbp->st_qspare[0] = sbp->st_qspare[0];
8117         usbp->st_qspare[1] = sbp->st_qspare[1];
8118 }
8119
8120 void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
8121 {
8122         bzero(usbp, sizeof(*usbp));
8123
8124         usbp->st_dev = sbp->st_dev;
8125         usbp->st_ino = sbp->st_ino;
8126         usbp->st_mode = sbp->st_mode;
8127         usbp->st_nlink = sbp->st_nlink;
8128         usbp->st_uid = sbp->st_uid;
8129         usbp->st_gid = sbp->st_gid;
8130         usbp->st_rdev = sbp->st_rdev;
8131 #ifndef _POSIX_C_SOURCE
8132         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
8133         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
8134         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
8135         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
8136         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
8137         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
8138 #else
8139         usbp->st_atime = sbp->st_atime;
8140         usbp->st_atimensec = sbp->st_atimensec;
8141         usbp->st_mtime = sbp->st_mtime;
8142         usbp->st_mtimensec = sbp->st_mtimensec;
8143         usbp->st_ctime = sbp->st_ctime;
8144         usbp->st_ctimensec = sbp->st_ctimensec;
8145 #endif
8146         usbp->st_size = sbp->st_size;
8147         usbp->st_blocks = sbp->st_blocks;
8148         usbp->st_blksize = sbp->st_blksize;
8149         usbp->st_flags = sbp->st_flags;
8150         usbp->st_gen = sbp->st_gen;
8151         usbp->st_lspare = sbp->st_lspare;
8152         usbp->st_qspare[0] = sbp->st_qspare[0];
8153         usbp->st_qspare[1] = sbp->st_qspare[1];
8154 }
8155
8156 /*
8157  * copy stat64 structure into user_stat64 structure.
8158  */
8159 void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
8160 {
8161         bzero(usbp, sizeof(*usbp));
8162
8163         usbp->st_dev = sbp->st_dev;
8164         usbp->st_ino = sbp->st_ino;
8165         usbp->st_mode = sbp->st_mode;
8166         usbp->st_nlink = sbp->st_nlink;
8167         usbp->st_uid = sbp->st_uid;
8168         usbp->st_gid = sbp->st_gid;
8169         usbp->st_rdev = sbp->st_rdev;
8170 #ifndef _POSIX_C_SOURCE
8171         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
8172         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
8173         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
8174         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
8175         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
8176         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
8177         usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
8178         usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
8179 #else
8180         usbp->st_atime = sbp->st_atime;
8181         usbp->st_atimensec = sbp->st_atimensec;
8182         usbp->st_mtime = sbp->st_mtime;
8183         usbp->st_mtimensec = sbp->st_mtimensec;
8184         usbp->st_ctime = sbp->st_ctime;
8185         usbp->st_ctimensec = sbp->st_ctimensec;
8186         usbp->st_birthtime = sbp->st_birthtime;
8187         usbp->st_birthtimensec = sbp->st_birthtimensec;
8188 #endif
8189         usbp->st_size = sbp->st_size;
8190         usbp->st_blocks = sbp->st_blocks;
8191         usbp->st_blksize = sbp->st_blksize;
8192         usbp->st_flags = sbp->st_flags;
8193         usbp->st_gen = sbp->st_gen;
8194         usbp->st_lspare = sbp->st_lspare;
8195         usbp->st_qspare[0] = sbp->st_qspare[0];
8196         usbp->st_qspare[1] = sbp->st_qspare[1];
8197 }
8198
8199 void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
8200 {
8201         bzero(usbp, sizeof(*usbp));
8202
8203         usbp->st_dev = sbp->st_dev;
8204         usbp->st_ino = sbp->st_ino;
8205         usbp->st_mode = sbp->st_mode;
8206         usbp->st_nlink = sbp->st_nlink;
8207         usbp->st_uid = sbp->st_uid;
8208         usbp->st_gid = sbp->st_gid;
8209         usbp->st_rdev = sbp->st_rdev;
8210 #ifndef _POSIX_C_SOURCE
8211         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
8212         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
8213         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
8214         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
8215         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
8216         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
8217         usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
8218         usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
8219 #else
8220         usbp->st_atime = sbp->st_atime;
8221         usbp->st_atimensec = sbp->st_atimensec;
8222         usbp->st_mtime = sbp->st_mtime;
8223         usbp->st_mtimensec = sbp->st_mtimensec;
8224         usbp->st_ctime = sbp->st_ctime;
8225         usbp->st_ctimensec = sbp->st_ctimensec;
8226         usbp->st_birthtime = sbp->st_birthtime;
8227         usbp->st_birthtimensec = sbp->st_birthtimensec;
8228 #endif
8229         usbp->st_size = sbp->st_size;
8230         usbp->st_blocks = sbp->st_blocks;
8231         usbp->st_blksize = sbp->st_blksize;
8232         usbp->st_flags = sbp->st_flags;
8233         usbp->st_gen = sbp->st_gen;
8234         usbp->st_lspare = sbp->st_lspare;
8235         usbp->st_qspare[0] = sbp->st_qspare[0];
8236         usbp->st_qspare[1] = sbp->st_qspare[1];
8237 }