bsd/vfs/vfs_syscalls.c

   1 /*
   2  * Copyright (c) 1995-2008 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1989, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  * (c) UNIX System Laboratories, Inc.
  32  * All or some portions of this file are derived from material licensed
  33  * to the University of California by American Telephone and Telegraph
  34  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  35  * the permission of UNIX System Laboratories, Inc.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      @(#)vfs_syscalls.c      8.41 (Berkeley) 6/15/95
  66  */
  67 /*
  68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  69  * support for mandatory and extensible security protections.  This notice
  70  * is included in support of clause 2.2 (b) of the Apple Public License,
  71  * Version 2.0.
  72  */
  73
  74 #include <sys/param.h>
  75 #include <sys/systm.h>
  76 #include <sys/namei.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/kernel.h>
  79 #include <sys/file_internal.h>
  80 #include <sys/stat.h>
  81 #include <sys/vnode_internal.h>
  82 #include <sys/mount_internal.h>
  83 #include <sys/proc_internal.h>
  84 #include <sys/kauth.h>
  85 #include <sys/uio_internal.h>
  86 #include <sys/malloc.h>
  87 #include <sys/mman.h>
  88 #include <sys/dirent.h>
  89 #include <sys/attr.h>
  90 #include <sys/sysctl.h>
  91 #include <sys/ubc.h>
  92 #include <sys/quota.h>
  93 #include <sys/kdebug.h>
  94 #include <sys/fsevents.h>
  95 #include <sys/sysproto.h>
  96 #include <sys/xattr.h>
  97 #include <sys/ubc_internal.h>
  98 #include <sys/disk.h>
  99 #include <machine/cons.h>
 100 #include <machine/limits.h>
 101 #include <miscfs/specfs/specdev.h>
 102 #include <miscfs/union/union.h>
 103
 104 #include <bsm/audit_kernel.h>
 105 #include <bsm/audit_kevents.h>
 106
 107 #include <mach/mach_types.h>
 108 #include <kern/kern_types.h>
 109 #include <kern/kalloc.h>
 110
 111 #include <vm/vm_pageout.h>
 112
 113 #include <libkern/OSAtomic.h>
 114
 115 #if CONFIG_MACF
 116 #include <security/mac.h>
 117 #include <security/mac_framework.h>
 118 #endif
 119
 120 #if CONFIG_FSE
 121 #define GET_PATH(x) \
 122         (x) = get_pathbuff();
 123 #define RELEASE_PATH(x) \
 124         release_pathbuff(x);
 125 #else
 126 #define GET_PATH(x)     \
 127         MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
 128 #define RELEASE_PATH(x) \
 129         FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
 130 #endif /* CONFIG_FSE */
 131
 132 /* struct for checkdirs iteration */
 133 struct cdirargs {
 134         vnode_t olddp;
 135         vnode_t newdp;
 136 };
 137 /* callback  for checkdirs iteration */
 138 static int checkdirs_callback(proc_t p, void * arg);
 139
 140 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
 141 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
 142 void enablequotas(struct mount *mp, vfs_context_t ctx);
 143 static int getfsstat_callback(mount_t mp, void * arg);
 144 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
 145 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
 146 static int sync_callback(mount_t, void *);
 147 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
 148                         user_addr_t bufp, int *sizep, boolean_t is_64_bit,
 149                                                 boolean_t partial_copy);
 150 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp);
 151 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
 152
 153 __private_extern__
 154 int sync_internal(void);
 155
 156 __private_extern__
 157 int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, register_t *);
 158
 159 __private_extern__
 160 int unlink1(vfs_context_t, struct nameidata *, int);
 161
 162
 163 #ifdef __APPLE_API_OBSOLETE
 164 struct fstatv_args {
 165        int fd;                  /* file descriptor of the target file */
 166        struct vstat *vsb;       /* vstat structure for returned info  */
 167 };
 168 struct lstatv_args {
 169        const char *path;        /* pathname of the target file       */
 170        struct vstat *vsb;       /* vstat structure for returned info */
 171 };
 172 struct mkcomplex_args {
 173         const char *path;       /* pathname of the file to be created */
 174                 mode_t mode;            /* access mode for the newly created file */
 175         u_long type;            /* format of the complex file */
 176 };
 177 struct statv_args {
 178         const char *path;       /* pathname of the target file       */
 179         struct vstat *vsb;      /* vstat structure for returned info */
 180 };
 181
 182 int fstatv(proc_t p, struct fstatv_args *uap, register_t *retval);
 183 int lstatv(proc_t p, struct lstatv_args *uap, register_t *retval);
 184 int mkcomplex(proc_t p, struct mkcomplex_args *uap, register_t *retval);
 185 int statv(proc_t p, struct statv_args *uap, register_t *retval);
 186
 187 #endif /* __APPLE_API_OBSOLETE */
 188
 189 /*
 190  * incremented each time a mount or unmount operation occurs
 191  * used to invalidate the cached value of the rootvp in the
 192  * mount structure utilized by cache_lookup_path
 193  */
 194 int mount_generation = 0;
 195
 196 /* counts number of mount and unmount operations */
 197 unsigned int vfs_nummntops=0;
 198
 199 extern struct fileops vnops;
 200 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
 201
 202
 203 /*
 204  * Virtual File System System Calls
 205  */
 206
 207 /*
 208  * Mount a file system.
 209  */
 210 /* ARGSUSED */
 211 int
 212 mount(proc_t p, struct mount_args *uap, __unused register_t *retval)
 213 {
 214         struct __mac_mount_args muap;
 215
 216         muap.type = uap->type;
 217         muap.path = uap->path;
 218         muap.flags = uap->flags;
 219         muap.data = uap->data;
 220         muap.mac_p = USER_ADDR_NULL;
 221         return (__mac_mount(p, &muap, retval));
 222 }
 223
 224 int
 225 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused register_t *retval)
 226 {
 227         struct vnode *vp;
 228         struct vnode *devvp = NULLVP;
 229         struct vnode *device_vnode = NULLVP;
 230 #if CONFIG_MACF
 231         struct vnode *rvp;
 232 #endif
 233         struct mount *mp;
 234         struct vfstable *vfsp = (struct vfstable *)0;
 235         int error, flag = 0;
 236         struct vnode_attr va;
 237         vfs_context_t ctx = vfs_context_current();
 238         struct nameidata nd;
 239         struct nameidata nd1;
 240         char fstypename[MFSNAMELEN];
 241         size_t dummy=0;
 242         user_addr_t devpath = USER_ADDR_NULL;
 243         user_addr_t fsmountargs =  uap->data;
 244         int ronly = 0;
 245         int mntalloc = 0;
 246         mode_t accessmode;
 247         boolean_t is_64bit;
 248         boolean_t is_rwlock_locked = FALSE;
 249
 250         AUDIT_ARG(fflags, uap->flags);
 251
 252         is_64bit = proc_is64bit(p);
 253
 254         /*
 255          * Get vnode to be covered
 256          */
 257         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
 258                    UIO_USERSPACE, uap->path, ctx);
 259         error = namei(&nd);
 260         if (error)
 261                 return (error);
 262         vp = nd.ni_vp;
 263
 264         if ((vp->v_flag & VROOT) &&
 265                 (vp->v_mount->mnt_flag & MNT_ROOTFS))
 266                         uap->flags |= MNT_UPDATE;
 267
 268         error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
 269         if (error)
 270                 goto out1;
 271
 272         if (uap->flags & MNT_UPDATE) {
 273                 if ((vp->v_flag & VROOT) == 0) {
 274                         error = EINVAL;
 275                         goto out1;
 276                 }
 277                 mp = vp->v_mount;
 278
 279                 /* unmount in progress return error */
 280                 mount_lock(mp);
 281                 if (mp->mnt_lflag & MNT_LUNMOUNT) {
 282                         mount_unlock(mp);
 283                         error = EBUSY;
 284                         goto out1;
 285                 }
 286                 mount_unlock(mp);
 287                 lck_rw_lock_exclusive(&mp->mnt_rwlock);
 288                 is_rwlock_locked = TRUE;
 289                 /*
 290                  * We only allow the filesystem to be reloaded if it
 291                  * is currently mounted read-only.
 292                  */
 293                 if ((uap->flags & MNT_RELOAD) &&
 294                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
 295                         error = ENOTSUP;
 296                         goto out1;
 297                 }
 298                 /*
 299                  * Only root, or the user that did the original mount is
 300                  * permitted to update it.
 301                  */
 302                 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
 303                     (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
 304                         goto out1;
 305                 }
 306 #if CONFIG_MACF
 307                 error = mac_mount_check_remount(ctx, mp);
 308                 if (error != 0) {
 309                         lck_rw_done(&mp->mnt_rwlock);
 310                         goto out1;
 311                 }
 312 #endif
 313                 /*
 314                  * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
 315                  * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
 316                  */
 317                 if (suser(vfs_context_ucred(ctx), NULL)) {
 318                         uap->flags |= MNT_NOSUID | MNT_NODEV;
 319                         if (mp->mnt_flag & MNT_NOEXEC)
 320                                 uap->flags |= MNT_NOEXEC;
 321                 }
 322                 flag = mp->mnt_flag;
 323
 324                 mp->mnt_flag |=
 325                     uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
 326
 327                 vfsp = mp->mnt_vtable;
 328                 goto update;
 329         }
 330         /*
 331          * If the user is not root, ensure that they own the directory
 332          * onto which we are attempting to mount.
 333          */
 334         VATTR_INIT(&va);
 335         VATTR_WANTED(&va, va_uid);
 336         if ((error = vnode_getattr(vp, &va, ctx)) ||
 337             (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
 338              (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))) {
 339                 goto out1;
 340         }
 341         /*
 342          * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
 343          * MNT_NOEXEC if mount point is already MNT_NOEXEC.
 344          */
 345         if (suser(vfs_context_ucred(ctx), NULL)) {
 346                 uap->flags |= MNT_NOSUID | MNT_NODEV;
 347                 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
 348                         uap->flags |= MNT_NOEXEC;
 349         }
 350         if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
 351                 goto out1;
 352
 353         if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
 354                 goto out1;
 355
 356         if (vp->v_type != VDIR) {
 357                 error = ENOTDIR;
 358                 goto out1;
 359         }
 360
 361         /* XXXAUDIT: Should we capture the type on the error path as well? */
 362         AUDIT_ARG(text, fstypename);
 363         mount_list_lock();
 364         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 365                 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN))
 366                         break;
 367         mount_list_unlock();
 368         if (vfsp == NULL) {
 369                 error = ENODEV;
 370                 goto out1;
 371         }
 372 #if CONFIG_MACF
 373         error = mac_mount_check_mount(ctx, vp,
 374             &nd.ni_cnd, vfsp->vfc_name);
 375         if (error != 0)
 376                 goto out1;
 377 #endif
 378         if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
 379                 error = EBUSY;
 380                 goto out1;
 381         }
 382         vnode_lock_spin(vp);
 383         SET(vp->v_flag, VMOUNT);
 384         vnode_unlock(vp);
 385
 386         /*
 387          * Allocate and initialize the filesystem.
 388          */
 389         MALLOC_ZONE(mp, struct mount *, (u_long)sizeof(struct mount),
 390                 M_MOUNT, M_WAITOK);
 391         bzero((char *)mp, (u_long)sizeof(struct mount));
 392         mntalloc = 1;
 393
 394         /* Initialize the default IO constraints */
 395         mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
 396         mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
 397         mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
 398         mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
 399         mp->mnt_devblocksize = DEV_BSIZE;
 400         mp->mnt_alignmentmask = PAGE_MASK;
 401         mp->mnt_ioflags = 0;
 402         mp->mnt_realrootvp = NULLVP;
 403         mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
 404
 405         TAILQ_INIT(&mp->mnt_vnodelist);
 406         TAILQ_INIT(&mp->mnt_workerqueue);
 407         TAILQ_INIT(&mp->mnt_newvnodes);
 408         mount_lock_init(mp);
 409         lck_rw_lock_exclusive(&mp->mnt_rwlock);
 410         is_rwlock_locked = TRUE;
 411         mp->mnt_op = vfsp->vfc_vfsops;
 412         mp->mnt_vtable = vfsp;
 413         mount_list_lock();
 414         vfsp->vfc_refcount++;
 415         mount_list_unlock();
 416         //mp->mnt_stat.f_type = vfsp->vfc_typenum;
 417         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 418         strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
 419         strncpy(mp->mnt_vfsstat.f_mntonname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
 420         mp->mnt_vnodecovered = vp;
 421         mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
 422         mp->mnt_devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
 423
 424         /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
 425         vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
 426
 427 update:
 428         /*
 429          * Set the mount level flags.
 430          */
 431         if (uap->flags & MNT_RDONLY)
 432                 mp->mnt_flag |= MNT_RDONLY;
 433         else if (mp->mnt_flag & MNT_RDONLY)
 434                 mp->mnt_kern_flag |= MNTK_WANTRDWR;
 435         mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 436                           MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 437                           MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED |
 438                           MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE);
 439         mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 440                                       MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 441                                       MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED |
 442                                           MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE);
 443
 444 #if CONFIG_MACF
 445         if (uap->flags & MNT_MULTILABEL) {
 446                 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
 447                         error = EINVAL;
 448                         goto out1;
 449                 }
 450                 mp->mnt_flag |= MNT_MULTILABEL;
 451         }
 452 #endif
 453
 454         if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
 455                 if (is_64bit) {
 456                         if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
 457                                 goto out1;
 458                         fsmountargs += sizeof(devpath);
 459                 } else {
 460                         char *tmp;
 461                         if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
 462                                 goto out1;
 463                         /* munge into LP64 addr */
 464                         devpath = CAST_USER_ADDR_T(tmp);
 465                         fsmountargs += sizeof(tmp);
 466                 }
 467
 468                 /* if it is not update and device name needs to be parsed */
 469                 if ((devpath)) {
 470                         NDINIT(&nd1, LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
 471                         if ( (error = namei(&nd1)) )
 472                                 goto out1;
 473
 474                         strncpy(mp->mnt_vfsstat.f_mntfromname, nd1.ni_cnd.cn_pnbuf, MAXPATHLEN);
 475                         devvp = nd1.ni_vp;
 476
 477                         nameidone(&nd1);
 478
 479                         if (devvp->v_type != VBLK) {
 480                                 error = ENOTBLK;
 481                                 goto out2;
 482                         }
 483                         if (major(devvp->v_rdev) >= nblkdev) {
 484                                 error = ENXIO;
 485                                 goto out2;
 486                         }
 487                         /*
 488                         * If mount by non-root, then verify that user has necessary
 489                         * permissions on the device.
 490                         */
 491                         if (suser(vfs_context_ucred(ctx), NULL) != 0) {
 492                                 accessmode = KAUTH_VNODE_READ_DATA;
 493                                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
 494                                         accessmode |= KAUTH_VNODE_WRITE_DATA;
 495                                 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
 496                                         goto out2;
 497                         }
 498                 }
 499                 if (devpath && ((uap->flags & MNT_UPDATE) == 0)) {
 500                         if ( (error = vnode_ref(devvp)) )
 501                                 goto out2;
 502                         /*
 503                         * Disallow multiple mounts of the same device.
 504                         * Disallow mounting of a device that is currently in use
 505                         * (except for root, which might share swap device for miniroot).
 506                         * Flush out any old buffers remaining from a previous use.
 507                         */
 508                         if ( (error = vfs_mountedon(devvp)) )
 509                                 goto out3;
 510
 511                         if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
 512                                 error = EBUSY;
 513                                 goto out3;
 514                         }
 515                         if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
 516                                 error = ENOTBLK;
 517                                 goto out3;
 518                         }
 519                         if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
 520                                 goto out3;
 521
 522                         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 523 #if CONFIG_MACF
 524                         error = mac_vnode_check_open(ctx,
 525                             devvp,
 526                             ronly ? FREAD : FREAD|FWRITE);
 527                         if (error)
 528                                 goto out3;
 529 #endif /* MAC */
 530                         if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
 531                                 goto out3;
 532
 533                         mp->mnt_devvp = devvp;
 534                         device_vnode = devvp;
 535                 } else {
 536                         if ((mp->mnt_flag & MNT_RDONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 537                                 /*
 538                                  * If upgrade to read-write by non-root, then verify
 539                                  * that user has necessary permissions on the device.
 540                                  */
 541                                 device_vnode = mp->mnt_devvp;
 542                                 if (device_vnode && suser(vfs_context_ucred(ctx), NULL)) {
 543                                         if ((error = vnode_authorize(device_vnode, NULL,
 544                                                  KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0)
 545                                                 goto out2;
 546                                 }
 547                         }
 548                         device_vnode = NULLVP;
 549                 }
 550         }
 551 #if CONFIG_MACF
 552         if ((uap->flags & MNT_UPDATE) == 0) {
 553                 mac_mount_label_init(mp);
 554                 mac_mount_label_associate(ctx, mp);
 555         }
 556         if (uap->mac_p != USER_ADDR_NULL) {
 557                 struct user_mac mac;
 558                 char *labelstr = NULL;
 559                 size_t ulen = 0;
 560
 561                 if ((uap->flags & MNT_UPDATE) != 0) {
 562                         error = mac_mount_check_label_update(
 563                             ctx, mp);
 564                         if (error != 0)
 565                                 goto out3;
 566                 }
 567                 if (is_64bit) {
 568                         error = copyin(uap->mac_p, &mac, sizeof(mac));
 569                 } else {
 570                         struct mac mac32;
 571                         error = copyin(uap->mac_p, &mac32, sizeof(mac32));
 572                         mac.m_buflen = mac32.m_buflen;
 573                         mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
 574                 }
 575                 if (error != 0)
 576                         goto out3;
 577                 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
 578                     (mac.m_buflen < 2)) {
 579                         error = EINVAL;
 580                         goto out3;
 581                 }
 582                 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
 583                 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
 584                 if (error != 0) {
 585                         FREE(labelstr, M_MACTEMP);
 586                         goto out3;
 587                 }
 588                 AUDIT_ARG(mac_string, labelstr);
 589                 error = mac_mount_label_internalize(mp->mnt_mntlabel, labelstr);
 590                 FREE(labelstr, M_MACTEMP);
 591                 if (error != 0)
 592                         goto out3;
 593         }
 594 #endif
 595         if (device_vnode != NULL) {
 596                 VNOP_IOCTL(device_vnode, DKIOCGETBSDUNIT, (caddr_t)&mp->mnt_devbsdunit, 0, NULL);
 597                 mp->mnt_devbsdunit %= LOWPRI_MAX_NUM_DEV;
 598         }
 599
 600         /*
 601          * Mount the filesystem.
 602          */
 603         error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
 604
 605         if (uap->flags & MNT_UPDATE) {
 606                 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
 607                         mp->mnt_flag &= ~MNT_RDONLY;
 608                 mp->mnt_flag &=~
 609                     (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
 610                 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
 611                 if (error)
 612                         mp->mnt_flag = flag;
 613                 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
 614                 lck_rw_done(&mp->mnt_rwlock);
 615                 is_rwlock_locked = FALSE;
 616                 if (!error)
 617                         enablequotas(mp, ctx);
 618                 goto out2;
 619         }
 620         /*
 621          * Put the new filesystem on the mount list after root.
 622          */
 623         if (error == 0) {
 624                 struct vfs_attr vfsattr;
 625 #if CONFIG_MACF
 626                 if (vfs_flags(mp) & MNT_MULTILABEL) {
 627                         error = VFS_ROOT(mp, &rvp, ctx);
 628                         if (error) {
 629                                 printf("%s() VFS_ROOT returned %d\n", __func__, error);
 630                                 goto out3;
 631                         }
 632
 633                         /* VFS_ROOT provides reference so needref = 0 */
 634                         error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
 635                         if (error)
 636                                 goto out3;
 637                 }
 638 #endif  /* MAC */
 639
 640                 vnode_lock_spin(vp);
 641                 CLR(vp->v_flag, VMOUNT);
 642                 vp->v_mountedhere = mp;
 643                 vnode_unlock(vp);
 644
 645                 /*
 646                  * taking the name_cache_lock exclusively will
 647                  * insure that everyone is out of the fast path who
 648                  * might be trying to use a now stale copy of
 649                  * vp->v_mountedhere->mnt_realrootvp
 650                  * bumping mount_generation causes the cached values
 651                  * to be invalidated
 652                  */
 653                 name_cache_lock();
 654                 mount_generation++;
 655                 name_cache_unlock();
 656
 657                 vnode_ref(vp);
 658
 659                 error = checkdirs(vp, ctx);
 660                 if (error != 0)  {
 661                         /* Unmount the filesystem as cdir/rdirs cannot be updated */
 662                         goto out4;
 663                 }
 664                 /*
 665                  * there is no cleanup code here so I have made it void
 666                  * we need to revisit this
 667                  */
 668                 (void)VFS_START(mp, 0, ctx);
 669
 670                 mount_list_add(mp);
 671                 lck_rw_done(&mp->mnt_rwlock);
 672                 is_rwlock_locked = FALSE;
 673
 674                 /* Check if this mounted file system supports EAs or named streams. */
 675                 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
 676                 VFSATTR_INIT(&vfsattr);
 677                 VFSATTR_WANTED(&vfsattr, f_capabilities);
 678                 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
 679                     vfs_getattr(mp, &vfsattr, ctx) == 0 &&
 680                     VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
 681                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
 682                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
 683                                 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 684                         }
 685 #if NAMEDSTREAMS
 686                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
 687                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
 688                                 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
 689                         }
 690 #endif
 691                         /* Check if this file system supports path from id lookups. */
 692                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
 693                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
 694                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 695                         } else if (mp->mnt_flag & MNT_DOVOLFS) {
 696                                 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
 697                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 698                         }
 699                 }
 700                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
 701                         mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 702                 }
 703                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
 704                         mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
 705                 }
 706                 /* increment the operations count */
 707                 OSAddAtomic(1, (SInt32 *)&vfs_nummntops);
 708                 enablequotas(mp, ctx);
 709
 710                 if (device_vnode) {
 711                         device_vnode->v_specflags |= SI_MOUNTEDON;
 712
 713                         /*
 714                          *   cache the IO attributes for the underlying physical media...
 715                          *   an error return indicates the underlying driver doesn't
 716                          *   support all the queries necessary... however, reasonable
 717                          *   defaults will have been set, so no reason to bail or care
 718                          */
 719                         vfs_init_io_attributes(device_vnode, mp);
 720                 }
 721
 722                 /* Now that mount is setup, notify the listeners */
 723                 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
 724         } else {
 725                 vnode_lock_spin(vp);
 726                 CLR(vp->v_flag, VMOUNT);
 727                 vnode_unlock(vp);
 728                 mount_list_lock();
 729                 mp->mnt_vtable->vfc_refcount--;
 730                 mount_list_unlock();
 731
 732                 if (device_vnode ) {
 733                         VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
 734                         vnode_rele(device_vnode);
 735                 }
 736                 lck_rw_done(&mp->mnt_rwlock);
 737                 is_rwlock_locked = FALSE;
 738                 mount_lock_destroy(mp);
 739 #if CONFIG_MACF
 740                 mac_mount_label_destroy(mp);
 741 #endif
 742                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
 743         }
 744         nameidone(&nd);
 745
 746         /*
 747          * drop I/O count on covered 'vp' and
 748          * on the device vp if there was one
 749          */
 750         if (devpath && devvp)
 751                 vnode_put(devvp);
 752         vnode_put(vp);
 753
 754         return(error);
 755 out4:
 756         (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
 757         if (device_vnode != NULLVP) {
 758                 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
 759                        ctx);
 760
 761         }
 762         vnode_lock_spin(vp);
 763         vp->v_mountedhere = (mount_t) 0;
 764         vnode_unlock(vp);
 765         vnode_rele(vp);
 766 out3:
 767         if (devpath && ((uap->flags & MNT_UPDATE) == 0))
 768                 vnode_rele(devvp);
 769 out2:
 770         if (devpath && devvp)
 771                 vnode_put(devvp);
 772 out1:
 773         /* Release mnt_rwlock only when it was taken */
 774         if (is_rwlock_locked == TRUE) {
 775                 lck_rw_done(&mp->mnt_rwlock);
 776         }
 777         if (mntalloc) {
 778 #if CONFIG_MACF
 779                 mac_mount_label_destroy(mp);
 780 #endif
 781                 mount_list_lock();
 782                 vfsp->vfc_refcount--;
 783                 mount_list_unlock();
 784                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
 785         }
 786         vnode_put(vp);
 787         nameidone(&nd);
 788
 789         return(error);
 790 }
 791
 792 void
 793 enablequotas(struct mount *mp, vfs_context_t ctx)
 794 {
 795         struct nameidata qnd;
 796         int type;
 797         char qfpath[MAXPATHLEN];
 798         const char *qfname = QUOTAFILENAME;
 799         const char *qfopsname = QUOTAOPSNAME;
 800         const char *qfextension[] = INITQFNAMES;
 801
 802         /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
 803         if ((strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 )
 804                 && (strncmp( mp->mnt_vfsstat.f_fstypename, "ufs", sizeof("ufs")) != 0))
 805           return;
 806
 807         /*
 808          * Enable filesystem disk quotas if necessary.
 809          * We ignore errors as this should not interfere with final mount
 810          */
 811         for (type=0; type < MAXQUOTAS; type++) {
 812                 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
 813                 NDINIT(&qnd, LOOKUP, FOLLOW, UIO_SYSSPACE32, CAST_USER_ADDR_T(qfpath), ctx);
 814                 if (namei(&qnd) != 0)
 815                         continue;           /* option file to trigger quotas is not present */
 816                 vnode_put(qnd.ni_vp);
 817                 nameidone(&qnd);
 818                 snprintf(qfpath, sizeof(qfpath),  "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
 819
 820                 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
 821         }
 822         return;
 823 }
 824
 825
 826 static int
 827 checkdirs_callback(proc_t p, void * arg)
 828 {
 829         struct cdirargs * cdrp = (struct cdirargs * )arg;
 830         vnode_t olddp = cdrp->olddp;
 831         vnode_t newdp = cdrp->newdp;
 832         struct filedesc *fdp;
 833         vnode_t tvp;
 834         vnode_t fdp_cvp;
 835         vnode_t fdp_rvp;
 836         int cdir_changed = 0;
 837         int rdir_changed = 0;
 838
 839         /*
 840          * XXX Also needs to iterate each thread in the process to see if it
 841          * XXX is using a per-thread current working directory, and, if so,
 842          * XXX update that as well.
 843          */
 844
 845         proc_fdlock(p);
 846         fdp = p->p_fd;
 847         if (fdp == (struct filedesc *)0) {
 848                 proc_fdunlock(p);
 849                 return(PROC_RETURNED);
 850         }
 851         fdp_cvp = fdp->fd_cdir;
 852         fdp_rvp = fdp->fd_rdir;
 853         proc_fdunlock(p);
 854
 855         if (fdp_cvp == olddp) {
 856                 vnode_ref(newdp);
 857                 tvp = fdp->fd_cdir;
 858                 fdp_cvp = newdp;
 859                 cdir_changed = 1;
 860                 vnode_rele(tvp);
 861         }
 862         if (fdp_rvp == olddp) {
 863                 vnode_ref(newdp);
 864                 tvp = fdp->fd_rdir;
 865                 fdp_rvp = newdp;
 866                 rdir_changed = 1;
 867                 vnode_rele(tvp);
 868         }
 869         if (cdir_changed || rdir_changed) {
 870                 proc_fdlock(p);
 871                 fdp->fd_cdir = fdp_cvp;
 872                 fdp->fd_rdir = fdp_rvp;
 873                 proc_fdunlock(p);
 874         }
 875         return(PROC_RETURNED);
 876 }
 877
 878
 879
 880 /*
 881  * Scan all active processes to see if any of them have a current
 882  * or root directory onto which the new filesystem has just been
 883  * mounted. If so, replace them with the new mount point.
 884  */
 885 static int
 886 checkdirs(vnode_t olddp, vfs_context_t ctx)
 887 {
 888         vnode_t newdp;
 889         vnode_t tvp;
 890         int err;
 891         struct cdirargs cdr;
 892         struct uthread * uth = get_bsdthread_info(current_thread());
 893
 894         if (olddp->v_usecount == 1)
 895                 return(0);
 896         if (uth != (struct uthread *)0)
 897                 uth->uu_notrigger = 1;
 898         err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
 899         if (uth != (struct uthread *)0)
 900                 uth->uu_notrigger = 0;
 901
 902         if (err != 0) {
 903 #if DIAGNOSTIC
 904                 panic("mount: lost mount: error %d", err);
 905 #endif
 906                 return(err);
 907         }
 908
 909         cdr.olddp = olddp;
 910         cdr.newdp = newdp;
 911         /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
 912         proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
 913
 914         if (rootvnode == olddp) {
 915                 vnode_ref(newdp);
 916                 tvp = rootvnode;
 917                 rootvnode = newdp;
 918                 vnode_rele(tvp);
 919         }
 920
 921         vnode_put(newdp);
 922         return(0);
 923 }
 924
 925 /*
 926  * Unmount a file system.
 927  *
 928  * Note: unmount takes a path to the vnode mounted on as argument,
 929  * not special file (as before).
 930  */
 931 /* ARGSUSED */
 932 int
 933 unmount(__unused proc_t p, struct unmount_args *uap, __unused register_t *retval)
 934 {
 935         vnode_t vp;
 936         struct mount *mp;
 937         int error;
 938         struct nameidata nd;
 939         vfs_context_t ctx = vfs_context_current();
 940
 941         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
 942                 UIO_USERSPACE, uap->path, ctx);
 943         error = namei(&nd);
 944         if (error)
 945                 return (error);
 946         vp = nd.ni_vp;
 947         mp = vp->v_mount;
 948         nameidone(&nd);
 949
 950 #if CONFIG_MACF
 951         error = mac_mount_check_umount(ctx, mp);
 952         if (error != 0) {
 953                 vnode_put(vp);
 954                 return (error);
 955         }
 956 #endif
 957         /*
 958          * Must be the root of the filesystem
 959          */
 960         if ((vp->v_flag & VROOT) == 0) {
 961                 vnode_put(vp);
 962                 return (EINVAL);
 963         }
 964         mount_ref(mp, 0);
 965         vnode_put(vp);
 966         /* safedounmount consumes the mount ref */
 967         return (safedounmount(mp, uap->flags, ctx));
 968 }
 969
 970 int
 971 vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
 972 {
 973         mount_t mp;
 974
 975         mp = mount_list_lookupby_fsid(fsid, 0, 1);
 976         if (mp == (mount_t)0) {
 977                 return(ENOENT);
 978         }
 979         mount_ref(mp, 0);
 980         mount_iterdrop(mp);
 981         /* safedounmount consumes the mount ref */
 982         return(safedounmount(mp, flags, ctx));
 983 }
 984
 985
 986 /*
 987  * The mount struct comes with a mount ref which will be consumed.
 988  * Do the actual file system unmount, prevent some common foot shooting.
 989  */
 990 int
 991 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
 992 {
 993         int error;
 994         proc_t p = vfs_context_proc(ctx);
 995
 996         /*
 997          * Only root, or the user that did the original mount is
 998          * permitted to unmount this filesystem.
 999          */
1000         if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1001             (error = suser(kauth_cred_get(), &p->p_acflag)))
1002                 goto out;
1003
1004         /*
1005          * Don't allow unmounting the root file system.
1006          */
1007         if (mp->mnt_flag & MNT_ROOTFS) {
1008                 error = EBUSY; /* the root is always busy */
1009                 goto out;
1010         }
1011
1012         return (dounmount(mp, flags, 1, ctx));
1013
1014 out:
1015         mount_drop(mp, 0);
1016         return(error);
1017 }
1018
1019 /*
1020  * Do the actual file system unmount.
1021  */
1022 int
1023 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1024 {
1025         vnode_t coveredvp = (vnode_t)0;
1026         int error;
1027         int needwakeup = 0;
1028         int forcedunmount = 0;
1029         int lflags = 0;
1030         struct vnode *devvp = NULLVP;
1031
1032         if (flags & MNT_FORCE)
1033                 forcedunmount = 1;
1034         mount_lock(mp);
1035         /* XXX post jaguar fix LK_DRAIN - then clean this up */
1036         if ((flags & MNT_FORCE)) {
1037                 mp->mnt_kern_flag |= MNTK_FRCUNMOUNT;
1038                 mp->mnt_lflag |= MNT_LFORCE;
1039         }
1040         if (mp->mnt_lflag & MNT_LUNMOUNT) {
1041                 mp->mnt_lflag |= MNT_LWAIT;
1042                 if(withref != 0)
1043                         mount_drop(mp, 1);
1044                 msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "dounmount", NULL);
1045                 /*
1046                  * The prior unmount attempt has probably succeeded.
1047                  * Do not dereference mp here - returning EBUSY is safest.
1048                  */
1049                 return (EBUSY);
1050         }
1051         mp->mnt_kern_flag |= MNTK_UNMOUNT;
1052         mp->mnt_lflag |= MNT_LUNMOUNT;
1053         mp->mnt_flag &=~ MNT_ASYNC;
1054         /*
1055          * anyone currently in the fast path that
1056          * trips over the cached rootvp will be
1057          * dumped out and forced into the slow path
1058          * to regenerate a new cached value
1059          */
1060         mp->mnt_realrootvp = NULLVP;
1061         mount_unlock(mp);
1062
1063         /*
1064          * taking the name_cache_lock exclusively will
1065          * insure that everyone is out of the fast path who
1066          * might be trying to use a now stale copy of
1067          * vp->v_mountedhere->mnt_realrootvp
1068          * bumping mount_generation causes the cached values
1069          * to be invalidated
1070          */
1071         name_cache_lock();
1072         mount_generation++;
1073         name_cache_unlock();
1074
1075
1076         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1077         if (withref != 0)
1078                 mount_drop(mp, 0);
1079 #if CONFIG_FSE
1080         fsevent_unmount(mp);  /* has to come first! */
1081 #endif
1082         error = 0;
1083         if (forcedunmount == 0) {
1084                 ubc_umount(mp); /* release cached vnodes */
1085                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1086                         error = VFS_SYNC(mp, MNT_WAIT, ctx);
1087                         if (error) {
1088                                 mount_lock(mp);
1089                                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1090                                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1091                                 mp->mnt_lflag &= ~MNT_LFORCE;
1092                                 goto out;
1093                         }
1094                 }
1095         }
1096
1097         if (forcedunmount)
1098                 lflags |= FORCECLOSE;
1099         error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM  | SKIPROOT | lflags);
1100         if ((forcedunmount == 0) && error) {
1101                 mount_lock(mp);
1102                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1103                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1104                 mp->mnt_lflag &= ~MNT_LFORCE;
1105                 goto out;
1106         }
1107
1108         /* make sure there are no one in the mount iterations or lookup */
1109         mount_iterdrain(mp);
1110
1111         error = VFS_UNMOUNT(mp, flags, ctx);
1112         if (error) {
1113                 mount_iterreset(mp);
1114                 mount_lock(mp);
1115                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1116                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1117                 mp->mnt_lflag &= ~MNT_LFORCE;
1118                 goto out;
1119         }
1120
1121         /* increment the operations count */
1122         if (!error)
1123                 OSAddAtomic(1, (SInt32 *)&vfs_nummntops);
1124
1125         if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1126                 /* hold an io reference and drop the usecount before close */
1127                 devvp = mp->mnt_devvp;
1128                 vnode_clearmountedon(devvp);
1129                 vnode_getalways(devvp);
1130                 vnode_rele(devvp);
1131                 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1132                        ctx);
1133                 vnode_put(devvp);
1134         }
1135         lck_rw_done(&mp->mnt_rwlock);
1136         mount_list_remove(mp);
1137         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1138
1139         /* mark the mount point hook in the vp but not drop the ref yet */
1140         if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
1141                         vnode_getwithref(coveredvp);
1142                         vnode_lock_spin(coveredvp);
1143                         coveredvp->v_mountedhere = (struct mount *)0;
1144                         vnode_unlock(coveredvp);
1145                         vnode_put(coveredvp);
1146         }
1147
1148         mount_list_lock();
1149         mp->mnt_vtable->vfc_refcount--;
1150         mount_list_unlock();
1151
1152         cache_purgevfs(mp);     /* remove cache entries for this file sys */
1153         vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
1154         mount_lock(mp);
1155         mp->mnt_lflag |= MNT_LDEAD;
1156
1157         if (mp->mnt_lflag & MNT_LWAIT) {
1158                 /*
1159                  * do the wakeup here
1160                  * in case we block in mount_refdrain
1161                  * which will drop the mount lock
1162                  * and allow anyone blocked in vfs_busy
1163                  * to wakeup and see the LDEAD state
1164                  */
1165                 mp->mnt_lflag &= ~MNT_LWAIT;
1166                 wakeup((caddr_t)mp);
1167         }
1168         mount_refdrain(mp);
1169 out:
1170         if (mp->mnt_lflag & MNT_LWAIT) {
1171                 mp->mnt_lflag &= ~MNT_LWAIT;
1172                 needwakeup = 1;
1173         }
1174         mount_unlock(mp);
1175         lck_rw_done(&mp->mnt_rwlock);
1176
1177         if (needwakeup)
1178                 wakeup((caddr_t)mp);
1179         if (!error) {
1180                 if ((coveredvp != NULLVP)) {
1181                         vnode_getwithref(coveredvp);
1182                         vnode_rele(coveredvp);
1183                         vnode_lock_spin(coveredvp);
1184                         if(mp->mnt_crossref == 0) {
1185                                 vnode_unlock(coveredvp);
1186                                 mount_lock_destroy(mp);
1187 #if CONFIG_MACF
1188                                 mac_mount_label_destroy(mp);
1189 #endif
1190                                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1191                         }  else {
1192                                 coveredvp->v_lflag |= VL_MOUNTDEAD;
1193                                 vnode_unlock(coveredvp);
1194                         }
1195                         vnode_put(coveredvp);
1196                 } else if (mp->mnt_flag & MNT_ROOTFS) {
1197                                 mount_lock_destroy(mp);
1198 #if CONFIG_MACF
1199                                 mac_mount_label_destroy(mp);
1200 #endif
1201                                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1202                 } else
1203                         panic("dounmount: no coveredvp");
1204         }
1205         return (error);
1206 }
1207
1208 void
1209 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
1210 {
1211                 vnode_lock(dp);
1212                 mp->mnt_crossref--;
1213                 if (mp->mnt_crossref < 0)
1214                         panic("mount cross refs -ve");
1215                 if (((dp->v_lflag & VL_MOUNTDEAD) == VL_MOUNTDEAD) && (mp->mnt_crossref == 0)) {
1216                         dp->v_lflag &= ~VL_MOUNTDEAD;
1217                         if (need_put)
1218                                 vnode_put_locked(dp);
1219                         vnode_unlock(dp);
1220                         mount_lock_destroy(mp);
1221 #if CONFIG_MACF
1222                         mac_mount_label_destroy(mp);
1223 #endif
1224                         FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1225                         return;
1226                 }
1227                 if (need_put)
1228                         vnode_put_locked(dp);
1229                 vnode_unlock(dp);
1230 }
1231
1232
1233 /*
1234  * Sync each mounted filesystem.
1235  */
1236 #if DIAGNOSTIC
1237 int syncprt = 0;
1238 struct ctldebug debug0 = { "syncprt", &syncprt };
1239 #endif
1240
1241 int print_vmpage_stat=0;
1242
1243 static int
1244 sync_callback(mount_t mp, __unused void * arg)
1245 {
1246         int asyncflag;
1247
1248         if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1249                         asyncflag = mp->mnt_flag & MNT_ASYNC;
1250                         mp->mnt_flag &= ~MNT_ASYNC;
1251                         VFS_SYNC(mp, MNT_NOWAIT, vfs_context_current());
1252                         if (asyncflag)
1253                                 mp->mnt_flag |= MNT_ASYNC;
1254         }
1255         return(VFS_RETURNED);
1256 }
1257
1258
1259 extern unsigned int vp_pagein, vp_pgodirty, vp_pgoclean;
1260 extern unsigned int dp_pgins, dp_pgouts;
1261
1262 /* ARGSUSED */
1263 int
1264 sync(__unused proc_t p, __unused struct sync_args *uap, __unused register_t *retval)
1265 {
1266
1267         vfs_iterate(LK_NOWAIT, sync_callback, (void *)0);
1268         {
1269         if(print_vmpage_stat) {
1270                 vm_countdirtypages();
1271                 printf("VP: %d: %d: %d: %d: %d\n", vp_pgodirty, vp_pgoclean, vp_pagein,
1272                         dp_pgins, dp_pgouts);
1273         }
1274         }
1275 #if DIAGNOSTIC
1276         if (syncprt)
1277                 vfs_bufstats();
1278 #endif /* DIAGNOSTIC */
1279         return (0);
1280 }
1281
1282 /*
1283  * Change filesystem quotas.
1284  */
1285 #if QUOTA
1286 static int quotactl_funneled(proc_t p, struct quotactl_args *uap, register_t *retval);
1287
1288 int
1289 quotactl(proc_t p, struct quotactl_args *uap, register_t *retval)
1290 {
1291         boolean_t funnel_state;
1292         int error;
1293
1294         funnel_state = thread_funnel_set(kernel_flock, TRUE);
1295         error = quotactl_funneled(p, uap, retval);
1296         thread_funnel_set(kernel_flock, funnel_state);
1297         return(error);
1298 }
1299
1300 static int
1301 quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused register_t *retval)
1302 {
1303         struct mount *mp;
1304         int error, quota_cmd, quota_status;
1305         caddr_t datap;
1306         size_t fnamelen;
1307         struct nameidata nd;
1308         vfs_context_t ctx = vfs_context_current();
1309         struct dqblk my_dqblk;
1310
1311         AUDIT_ARG(uid, uap->uid, 0, 0, 0);
1312         AUDIT_ARG(cmd, uap->cmd);
1313         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
1314                 UIO_USERSPACE, uap->path, ctx);
1315         error = namei(&nd);
1316         if (error)
1317                 return (error);
1318         mp = nd.ni_vp->v_mount;
1319         vnode_put(nd.ni_vp);
1320         nameidone(&nd);
1321
1322         /* copyin any data we will need for downstream code */
1323         quota_cmd = uap->cmd >> SUBCMDSHIFT;
1324
1325         switch (quota_cmd) {
1326         case Q_QUOTAON:
1327                 /* uap->arg specifies a file from which to take the quotas */
1328                 fnamelen = MAXPATHLEN;
1329                 datap = kalloc(MAXPATHLEN);
1330                 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
1331                 break;
1332         case Q_GETQUOTA:
1333                 /* uap->arg is a pointer to a dqblk structure. */
1334                 datap = (caddr_t) &my_dqblk;
1335                 break;
1336         case Q_SETQUOTA:
1337         case Q_SETUSE:
1338                 /* uap->arg is a pointer to a dqblk structure. */
1339                 datap = (caddr_t) &my_dqblk;
1340                 if (proc_is64bit(p)) {
1341                         struct user_dqblk       my_dqblk64;
1342                         error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
1343                         if (error == 0) {
1344                                 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
1345                         }
1346                 }
1347                 else {
1348                         error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
1349                 }
1350                 break;
1351         case Q_QUOTASTAT:
1352                 /* uap->arg is a pointer to an integer */
1353                 datap = (caddr_t) &quota_status;
1354                 break;
1355         default:
1356                 datap = NULL;
1357                 break;
1358         } /* switch */
1359
1360         if (error == 0) {
1361                 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
1362         }
1363
1364         switch (quota_cmd) {
1365         case Q_QUOTAON:
1366                 if (datap != NULL)
1367                         kfree(datap, MAXPATHLEN);
1368                 break;
1369         case Q_GETQUOTA:
1370                 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
1371                 if (error == 0) {
1372                         if (proc_is64bit(p)) {
1373                                 struct user_dqblk       my_dqblk64;
1374                                 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
1375                                 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
1376                         }
1377                         else {
1378                                 error = copyout(datap, uap->arg, sizeof (struct dqblk));
1379                         }
1380                 }
1381                 break;
1382         case Q_QUOTASTAT:
1383                 /* uap->arg is a pointer to an integer */
1384                 if (error == 0) {
1385                         error = copyout(datap, uap->arg, sizeof(quota_status));
1386                 }
1387                 break;
1388         default:
1389                 break;
1390         } /* switch */
1391
1392         return (error);
1393 }
1394 #else
1395 int
1396 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused register_t *retval)
1397 {
1398         return (EOPNOTSUPP);
1399 }
1400 #endif /* QUOTA */
1401
1402 /*
1403  * Get filesystem statistics.
1404  *
1405  * Returns:     0                       Success
1406  *      namei:???
1407  *      vfs_update_vfsstat:???
1408  *      munge_statfs:EFAULT
1409  */
1410 /* ARGSUSED */
1411 int
1412 statfs(__unused proc_t p, struct statfs_args *uap, __unused register_t *retval)
1413 {
1414         struct mount *mp;
1415         struct vfsstatfs *sp;
1416         int error;
1417         struct nameidata nd;
1418         vfs_context_t ctx = vfs_context_current();
1419         vnode_t vp;
1420
1421         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1422                 UIO_USERSPACE, uap->path, ctx);
1423         error = namei(&nd);
1424         if (error)
1425                 return (error);
1426         vp = nd.ni_vp;
1427         mp = vp->v_mount;
1428         sp = &mp->mnt_vfsstat;
1429         nameidone(&nd);
1430
1431         error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
1432         vnode_put(vp);
1433         if (error != 0)
1434                 return (error);
1435
1436         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
1437         return (error);
1438 }
1439
1440 /*
1441  * Get filesystem statistics.
1442  */
1443 /* ARGSUSED */
1444 int
1445 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused register_t *retval)
1446 {
1447         vnode_t vp;
1448         struct mount *mp;
1449         struct vfsstatfs *sp;
1450         int error;
1451
1452         AUDIT_ARG(fd, uap->fd);
1453
1454         if ( (error = file_vnode(uap->fd, &vp)) )
1455                 return (error);
1456
1457         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
1458
1459         mp = vp->v_mount;
1460         if (!mp) {
1461                 file_drop(uap->fd);
1462                 return (EBADF);
1463         }
1464         sp = &mp->mnt_vfsstat;
1465         if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
1466                 file_drop(uap->fd);
1467                 return (error);
1468         }
1469         file_drop(uap->fd);
1470
1471         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
1472
1473         return (error);
1474 }
1475
1476 /*
1477  * Common routine to handle copying of statfs64 data to user space
1478  */
1479 static int
1480 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
1481 {
1482         int error;
1483         struct statfs64 sfs;
1484
1485         bzero(&sfs, sizeof(sfs));
1486
1487         sfs.f_bsize = sfsp->f_bsize;
1488         sfs.f_iosize = (int32_t)sfsp->f_iosize;
1489         sfs.f_blocks = sfsp->f_blocks;
1490         sfs.f_bfree = sfsp->f_bfree;
1491         sfs.f_bavail = sfsp->f_bavail;
1492         sfs.f_files = sfsp->f_files;
1493         sfs.f_ffree = sfsp->f_ffree;
1494         sfs.f_fsid = sfsp->f_fsid;
1495         sfs.f_owner = sfsp->f_owner;
1496         sfs.f_type = mp->mnt_vtable->vfc_typenum;
1497         sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1498         sfs.f_fssubtype = sfsp->f_fssubtype;
1499         strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
1500         strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
1501         strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
1502
1503         error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
1504
1505         return(error);
1506 }
1507
1508 /*
1509  * Get file system statistics in 64-bit mode
1510  */
1511 int
1512 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused register_t *retval)
1513 {
1514         struct mount *mp;
1515         struct vfsstatfs *sp;
1516         int error;
1517         struct nameidata nd;
1518         vfs_context_t ctxp = vfs_context_current();
1519         vnode_t vp;
1520
1521         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1522                 UIO_USERSPACE, uap->path, ctxp);
1523         error = namei(&nd);
1524         if (error)
1525                 return (error);
1526         vp = nd.ni_vp;
1527         mp = vp->v_mount;
1528         sp = &mp->mnt_vfsstat;
1529         nameidone(&nd);
1530
1531         error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
1532         vnode_put(vp);
1533         if (error != 0)
1534                 return (error);
1535
1536         error = statfs64_common(mp, sp, uap->buf);
1537
1538         return (error);
1539 }
1540
1541 /*
1542  * Get file system statistics in 64-bit mode
1543  */
1544 int
1545 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused register_t *retval)
1546 {
1547         struct vnode *vp;
1548         struct mount *mp;
1549         struct vfsstatfs *sp;
1550         int error;
1551
1552         AUDIT_ARG(fd, uap->fd);
1553
1554         if ( (error = file_vnode(uap->fd, &vp)) )
1555                 return (error);
1556
1557         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
1558
1559         mp = vp->v_mount;
1560         if (!mp) {
1561                 file_drop(uap->fd);
1562                 return (EBADF);
1563         }
1564         sp = &mp->mnt_vfsstat;
1565         if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
1566                 file_drop(uap->fd);
1567                 return (error);
1568         }
1569         file_drop(uap->fd);
1570
1571         error = statfs64_common(mp, sp, uap->buf);
1572
1573         return (error);
1574 }
1575
1576 struct getfsstat_struct {
1577         user_addr_t     sfsp;
1578         user_addr_t     *mp;
1579         int             count;
1580         int             maxcount;
1581         int             flags;
1582         int             error;
1583 };
1584
1585
1586 static int
1587 getfsstat_callback(mount_t mp, void * arg)
1588 {
1589
1590         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
1591         struct vfsstatfs *sp;
1592         int error, my_size;
1593         vfs_context_t ctx = vfs_context_current();
1594
1595         if (fstp->sfsp && fstp->count < fstp->maxcount) {
1596                 sp = &mp->mnt_vfsstat;
1597                 /*
1598                  * If MNT_NOWAIT is specified, do not refresh the
1599                  * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
1600                  */
1601                 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & MNT_WAIT)) &&
1602                         (error = vfs_update_vfsstat(mp, ctx,
1603                             VFS_USER_EVENT))) {
1604                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
1605                         return(VFS_RETURNED);
1606                 }
1607
1608                 /*
1609                  * Need to handle LP64 version of struct statfs
1610                  */
1611                 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
1612                 if (error) {
1613                         fstp->error = error;
1614                         return(VFS_RETURNED_DONE);
1615                 }
1616                 fstp->sfsp += my_size;
1617
1618                 if (fstp->mp) {
1619                         error = mac_mount_label_get(mp, *fstp->mp);
1620                         if (error) {
1621                                 fstp->error = error;
1622                                 return(VFS_RETURNED_DONE);
1623                         }
1624                         fstp->mp++;
1625                 }
1626         }
1627         fstp->count++;
1628         return(VFS_RETURNED);
1629 }
1630
1631 /*
1632  * Get statistics on all filesystems.
1633  */
1634 int
1635 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
1636 {
1637         struct __mac_getfsstat_args muap;
1638
1639         muap.buf = uap->buf;
1640         muap.bufsize = uap->bufsize;
1641         muap.mac = USER_ADDR_NULL;
1642         muap.macsize = 0;
1643         muap.flags = uap->flags;
1644
1645         return (__mac_getfsstat(p, &muap, retval));
1646 }
1647
1648 int
1649 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
1650 {
1651         user_addr_t sfsp;
1652         user_addr_t *mp;
1653         int count, maxcount;
1654         struct getfsstat_struct fst;
1655
1656         if (IS_64BIT_PROCESS(p)) {
1657                 maxcount = uap->bufsize / sizeof(struct user_statfs);
1658         }
1659         else {
1660                 maxcount = uap->bufsize / sizeof(struct statfs);
1661         }
1662         sfsp = uap->buf;
1663         count = 0;
1664
1665         mp = NULL;
1666
1667 #if CONFIG_MACF
1668         if (uap->mac != USER_ADDR_NULL) {
1669                 u_int32_t *mp0;
1670                 int error;
1671                 int i;
1672
1673                 count = (int)(uap->macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
1674                 if (count != maxcount)
1675                         return (EINVAL);
1676
1677                 /* Copy in the array */
1678                 MALLOC(mp0, u_int32_t *, uap->macsize, M_MACTEMP, M_WAITOK);
1679                 error = copyin(uap->mac, mp0, uap->macsize);
1680                 if (error)
1681                         return (error);
1682
1683                 /* Normalize to an array of user_addr_t */
1684                 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
1685                 for (i = 0; i < count; i++) {
1686                         if (IS_64BIT_PROCESS(p))
1687                                 mp[i] = ((user_addr_t *)mp0)[i];
1688                         else
1689                                 mp[i] = (user_addr_t)mp0[i];
1690                 }
1691                 FREE(mp0, M_MACTEMP);
1692         }
1693 #endif
1694
1695
1696         fst.sfsp = sfsp;
1697         fst.mp = mp;
1698         fst.flags = uap->flags;
1699         fst.count = 0;
1700         fst.error = 0;
1701         fst.maxcount = maxcount;
1702
1703
1704         vfs_iterate(0, getfsstat_callback, &fst);
1705
1706         if (mp)
1707                 FREE(mp, M_MACTEMP);
1708
1709         if (fst.error ) {
1710                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
1711                 return(fst.error);
1712         }
1713
1714         if (fst.sfsp && fst.count > fst.maxcount)
1715                 *retval = fst.maxcount;
1716         else
1717                 *retval = fst.count;
1718         return (0);
1719 }
1720
1721 static int
1722 getfsstat64_callback(mount_t mp, void * arg)
1723 {
1724         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
1725         struct vfsstatfs *sp;
1726         int error;
1727
1728         if (fstp->sfsp && fstp->count < fstp->maxcount) {
1729                 sp = &mp->mnt_vfsstat;
1730                 /*
1731                  * If MNT_NOWAIT is specified, do not refresh the
1732                  * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
1733                  */
1734                 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & MNT_WAIT)) &&
1735                     (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
1736                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
1737                         return(VFS_RETURNED);
1738                 }
1739
1740                 error = statfs64_common(mp, sp, fstp->sfsp);
1741                 if (error) {
1742                         fstp->error = error;
1743                         return(VFS_RETURNED_DONE);
1744                 }
1745                 fstp->sfsp += sizeof(struct statfs64);
1746         }
1747         fstp->count++;
1748         return(VFS_RETURNED);
1749 }
1750
1751 /*
1752  * Get statistics on all file systems in 64 bit mode.
1753  */
1754 int
1755 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
1756 {
1757         user_addr_t sfsp;
1758         int count, maxcount;
1759         struct getfsstat_struct fst;
1760
1761         maxcount = uap->bufsize / sizeof(struct statfs64);
1762
1763         sfsp = uap->buf;
1764         count = 0;
1765
1766         fst.sfsp = sfsp;
1767         fst.flags = uap->flags;
1768         fst.count = 0;
1769         fst.error = 0;
1770         fst.maxcount = maxcount;
1771
1772         vfs_iterate(0, getfsstat64_callback, &fst);
1773
1774         if (fst.error ) {
1775                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
1776                 return(fst.error);
1777         }
1778
1779         if (fst.sfsp && fst.count > fst.maxcount)
1780                 *retval = fst.maxcount;
1781         else
1782                 *retval = fst.count;
1783
1784         return (0);
1785 }
1786
1787 #if COMPAT_GETFSSTAT
1788 ogetfsstat(proc_t p, struct getfsstat_args *uap, register_t *retval)
1789 {
1790         return (ENOTSUP);
1791 }
1792 #endif
1793
1794 /*
1795  * Change current working directory to a given file descriptor.
1796  */
1797 /* ARGSUSED */
1798 static int
1799 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
1800 {
1801         struct filedesc *fdp = p->p_fd;
1802         vnode_t vp;
1803         vnode_t tdp;
1804         vnode_t tvp;
1805         struct mount *mp;
1806         int error;
1807         vfs_context_t ctx = vfs_context_current();
1808
1809         if (per_thread && uap->fd == -1) {
1810                 /*
1811                  * Switching back from per-thread to per process CWD; verify we
1812                  * in fact have one before proceeding.  The only success case
1813                  * for this code path is to return 0 preemptively after zapping
1814                  * the thread structure contents.
1815                  */
1816                 thread_t th = vfs_context_thread(ctx);
1817                 if (th) {
1818                         uthread_t uth = get_bsdthread_info(th);
1819                         tvp = uth->uu_cdir;
1820                         uth->uu_cdir = NULLVP;
1821                         if (tvp != NULLVP) {
1822                                 vnode_rele(tvp);
1823                                 return (0);
1824                         }
1825                 }
1826                 return (EBADF);
1827         }
1828
1829         if ( (error = file_vnode(uap->fd, &vp)) )
1830                 return(error);
1831         if ( (error = vnode_getwithref(vp)) ) {
1832                 file_drop(uap->fd);
1833                 return(error);
1834         }
1835
1836         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1837
1838         if (vp->v_type != VDIR) {
1839                 error = ENOTDIR;
1840                 goto out;
1841         }
1842
1843 #if CONFIG_MACF
1844         error = mac_vnode_check_chdir(ctx, vp);
1845         if (error)
1846                 goto out;
1847 #endif
1848         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
1849         if (error)
1850                 goto out;
1851
1852         while (!error && (mp = vp->v_mountedhere) != NULL) {
1853                 if (vfs_busy(mp, LK_NOWAIT)) {
1854                         error = EACCES;
1855                         goto out;
1856                 }
1857                 error = VFS_ROOT(mp, &tdp, ctx);
1858                 vfs_unbusy(mp);
1859                 if (error)
1860                         break;
1861                 vnode_put(vp);
1862                 vp = tdp;
1863         }
1864         if (error)
1865                 goto out;
1866         if ( (error = vnode_ref(vp)) )
1867                 goto out;
1868         vnode_put(vp);
1869
1870         if (per_thread) {
1871                 thread_t th = vfs_context_thread(ctx);
1872                 if (th) {
1873                         uthread_t uth = get_bsdthread_info(th);
1874                         tvp = uth->uu_cdir;
1875                         uth->uu_cdir = vp;
1876                         OSBitOrAtomic(P_THCWD, (UInt32 *)&p->p_flag);
1877                 } else {
1878                         vnode_rele(vp);
1879                         return (ENOENT);
1880                 }
1881         } else {
1882                 proc_fdlock(p);
1883                 tvp = fdp->fd_cdir;
1884                 fdp->fd_cdir = vp;
1885                 proc_fdunlock(p);
1886         }
1887
1888         if (tvp)
1889                 vnode_rele(tvp);
1890         file_drop(uap->fd);
1891
1892         return (0);
1893 out:
1894         vnode_put(vp);
1895         file_drop(uap->fd);
1896
1897         return(error);
1898 }
1899
1900 int
1901 fchdir(proc_t p, struct fchdir_args *uap, __unused register_t *retval)
1902 {
1903         return common_fchdir(p, uap, 0);
1904 }
1905
1906 int
1907 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused register_t *retval)
1908 {
1909         return common_fchdir(p, (void *)uap, 1);
1910 }
1911
1912 /*
1913  * Change current working directory (``.'').
1914  *
1915  * Returns:     0                       Success
1916  *      change_dir:ENOTDIR
1917  *      change_dir:???
1918  *      vnode_ref:ENOENT                No such file or directory
1919  */
1920 /* ARGSUSED */
1921 static int
1922 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
1923 {
1924         struct filedesc *fdp = p->p_fd;
1925         int error;
1926         struct nameidata nd;
1927         vnode_t tvp;
1928         vfs_context_t ctx = vfs_context_current();
1929
1930         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
1931                 UIO_USERSPACE, uap->path, ctx);
1932         error = change_dir(&nd, ctx);
1933         if (error)
1934                 return (error);
1935         if ( (error = vnode_ref(nd.ni_vp)) ) {
1936                 vnode_put(nd.ni_vp);
1937                 return (error);
1938         }
1939         /*
1940          * drop the iocount we picked up in change_dir
1941          */
1942         vnode_put(nd.ni_vp);
1943
1944         if (per_thread) {
1945                 thread_t th = vfs_context_thread(ctx);
1946                 if (th) {
1947                         uthread_t uth = get_bsdthread_info(th);
1948                         tvp = uth->uu_cdir;
1949                         uth->uu_cdir = nd.ni_vp;
1950                         OSBitOrAtomic(P_THCWD, (UInt32 *)&p->p_flag);
1951                 } else {
1952                         vnode_rele(nd.ni_vp);
1953                         return (ENOENT);
1954                 }
1955         } else {
1956                 proc_fdlock(p);
1957                 tvp = fdp->fd_cdir;
1958                 fdp->fd_cdir = nd.ni_vp;
1959                 proc_fdunlock(p);
1960         }
1961
1962         if (tvp)
1963                 vnode_rele(tvp);
1964
1965         return (0);
1966 }
1967
1968 int
1969 chdir(proc_t p, struct chdir_args *uap, __unused register_t *retval)
1970 {
1971         return common_chdir(p, (void *)uap, 0);
1972 }
1973
1974 int
1975 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused register_t *retval)
1976 {
1977         return common_chdir(p, (void *)uap, 1);
1978 }
1979
1980
1981 /*
1982  * Change notion of root (``/'') directory.
1983  */
1984 /* ARGSUSED */
1985 int
1986 chroot(proc_t p, struct chroot_args *uap, __unused register_t *retval)
1987 {
1988         struct filedesc *fdp = p->p_fd;
1989         int error;
1990         struct nameidata nd;
1991         vnode_t tvp;
1992         vfs_context_t ctx = vfs_context_current();
1993
1994         if ((error = suser(kauth_cred_get(), &p->p_acflag)))
1995                 return (error);
1996
1997         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
1998                 UIO_USERSPACE, uap->path, ctx);
1999         error = change_dir(&nd, ctx);
2000         if (error)
2001                 return (error);
2002
2003 #if CONFIG_MACF
2004         error = mac_vnode_check_chroot(ctx, nd.ni_vp,
2005             &nd.ni_cnd);
2006         if (error) {
2007                 vnode_put(nd.ni_vp);
2008                 return (error);
2009         }
2010 #endif
2011
2012         if ( (error = vnode_ref(nd.ni_vp)) ) {
2013                 vnode_put(nd.ni_vp);
2014                 return (error);
2015         }
2016         vnode_put(nd.ni_vp);
2017
2018         proc_fdlock(p);
2019         tvp = fdp->fd_rdir;
2020         fdp->fd_rdir = nd.ni_vp;
2021         fdp->fd_flags |= FD_CHROOT;
2022         proc_fdunlock(p);
2023
2024         if (tvp != NULL)
2025                 vnode_rele(tvp);
2026
2027         return (0);
2028 }
2029
2030 /*
2031  * Common routine for chroot and chdir.
2032  *
2033  * Returns:     0                       Success
2034  *              ENOTDIR                 Not a directory
2035  *              namei:???               [anything namei can return]
2036  *              vnode_authorize:???     [anything vnode_authorize can return]
2037  */
2038 static int
2039 change_dir(struct nameidata *ndp, vfs_context_t ctx)
2040 {
2041         vnode_t vp;
2042         int error;
2043
2044         if ((error = namei(ndp)))
2045                 return (error);
2046         nameidone(ndp);
2047         vp = ndp->ni_vp;
2048
2049         if (vp->v_type != VDIR) {
2050                 vnode_put(vp);
2051                 return (ENOTDIR);
2052         }
2053
2054 #if CONFIG_MACF
2055         error = mac_vnode_check_chdir(ctx, vp);
2056         if (error) {
2057                 vnode_put(vp);
2058                 return (error);
2059         }
2060 #endif
2061
2062         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2063         if (error) {
2064                 vnode_put(vp);
2065                 return (error);
2066         }
2067
2068         return (error);
2069 }
2070
2071 /*
2072  * Check permissions, allocate an open file structure,
2073  * and call the device open routine if any.
2074  *
2075  * Returns:     0                       Success
2076  *              EINVAL
2077  *              EINTR
2078  *      falloc:ENFILE
2079  *      falloc:EMFILE
2080  *      falloc:ENOMEM
2081  *      vn_open_auth:???
2082  *      dupfdopen:???
2083  *      VNOP_ADVLOCK:???
2084  *      vnode_setsize:???
2085  */
2086 #warning XXX implement uid, gid
2087 int
2088 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *vap, register_t *retval)
2089 {
2090         proc_t p = vfs_context_proc(ctx);
2091         uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2092         struct filedesc *fdp = p->p_fd;
2093         struct fileproc *fp;
2094         vnode_t vp;
2095         int flags, oflags;
2096         struct fileproc *nfp;
2097         int type, indx, error;
2098         struct flock lf;
2099         int no_controlling_tty = 0;
2100         int deny_controlling_tty = 0;
2101         struct session *sessp = SESSION_NULL;
2102         struct vfs_context context = *vfs_context_current();    /* local copy */
2103
2104         oflags = uflags;
2105
2106         if ((oflags & O_ACCMODE) == O_ACCMODE)
2107                 return(EINVAL);
2108         flags = FFLAGS(uflags);
2109
2110         AUDIT_ARG(fflags, oflags);
2111         AUDIT_ARG(mode, vap->va_mode);
2112
2113         if ( (error = falloc(p, &nfp, &indx, ctx)) ) {
2114                 return (error);
2115         }
2116         fp = nfp;
2117         uu->uu_dupfd = -indx - 1;
2118
2119         if (!(p->p_flag & P_CONTROLT)) {
2120                 sessp = proc_session(p);
2121                 no_controlling_tty = 1;
2122                 /*
2123                  * If conditions would warrant getting a controlling tty if
2124                  * the device being opened is a tty (see ttyopen in tty.c),
2125                  * but the open flags deny it, set a flag in the session to
2126                  * prevent it.
2127                  */
2128                 if (SESS_LEADER(p, sessp) &&
2129                     sessp->s_ttyvp == NULL &&
2130                     (flags & O_NOCTTY)) {
2131                         session_lock(sessp);
2132                         sessp->s_flags |= S_NOCTTY;
2133                         session_unlock(sessp);
2134                         deny_controlling_tty = 1;
2135                 }
2136         }
2137
2138         if ((error = vn_open_auth(ndp, &flags, vap))) {
2139                 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){        /* XXX from fdopen */
2140                         if ((error = dupfdopen(fdp, indx, uu->uu_dupfd, flags, error)) == 0) {
2141                                 fp_drop(p, indx, NULL, 0);
2142                                 *retval = indx;
2143                                 if (deny_controlling_tty) {
2144                                         session_lock(sessp);
2145                                         sessp->s_flags &= ~S_NOCTTY;
2146                                         session_unlock(sessp);
2147                                 }
2148                                 if (sessp != SESSION_NULL)
2149                                         session_rele(sessp);
2150                                 return (0);
2151                         }
2152                 }
2153                 if (error == ERESTART)
2154                         error = EINTR;
2155                 fp_free(p, indx, fp);
2156
2157                 if (deny_controlling_tty) {
2158                         session_lock(sessp);
2159                         sessp->s_flags &= ~S_NOCTTY;
2160                         session_unlock(sessp);
2161                 }
2162                 if (sessp != SESSION_NULL)
2163                         session_rele(sessp);
2164                 return (error);
2165         }
2166         uu->uu_dupfd = 0;
2167         vp = ndp->ni_vp;
2168
2169         fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
2170         fp->f_fglob->fg_type = DTYPE_VNODE;
2171         fp->f_fglob->fg_ops = &vnops;
2172         fp->f_fglob->fg_data = (caddr_t)vp;
2173
2174         if (flags & (O_EXLOCK | O_SHLOCK)) {
2175                 lf.l_whence = SEEK_SET;
2176                 lf.l_start = 0;
2177                 lf.l_len = 0;
2178                 if (flags & O_EXLOCK)
2179                         lf.l_type = F_WRLCK;
2180                 else
2181                         lf.l_type = F_RDLCK;
2182                 type = F_FLOCK;
2183                 if ((flags & FNONBLOCK) == 0)
2184                         type |= F_WAIT;
2185 #if CONFIG_MACF
2186                 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
2187                     F_SETLK, &lf);
2188                 if (error)
2189                         goto bad;
2190 #endif
2191                 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx)))
2192                         goto bad;
2193                 fp->f_fglob->fg_flag |= FHASLOCK;
2194         }
2195
2196         /* try to truncate by setting the size attribute */
2197         if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
2198                 goto bad;
2199
2200         /*
2201          * If the open flags denied the acquisition of a controlling tty,
2202          * clear the flag in the session structure that prevented the lower
2203          * level code from assigning one.
2204          */
2205         if (deny_controlling_tty) {
2206                 session_lock(sessp);
2207                 sessp->s_flags &= ~S_NOCTTY;
2208                 session_unlock(sessp);
2209         }
2210
2211         /*
2212          * If a controlling tty was set by the tty line discipline, then we
2213          * want to set the vp of the tty into the session structure.  We have
2214          * a race here because we can't get to the vp for the tp in ttyopen,
2215          * because it's not passed as a parameter in the open path.
2216          */
2217         if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
2218                 vnode_t ttyvp;
2219                 vnode_ref(vp);
2220                 session_lock(sessp);
2221                 ttyvp = sessp->s_ttyvp;
2222                 sessp->s_ttyvp = vp;
2223                 sessp->s_ttyvid = vnode_vid(vp);
2224                 session_unlock(sessp);
2225                 if (ttyvp != NULLVP)
2226                         vnode_rele(ttyvp);
2227         }
2228
2229         vnode_put(vp);
2230
2231         proc_fdlock(p);
2232         procfdtbl_releasefd(p, indx, NULL);
2233         fp_drop(p, indx, fp, 1);
2234         proc_fdunlock(p);
2235
2236         *retval = indx;
2237
2238         if (sessp != SESSION_NULL)
2239                 session_rele(sessp);
2240         return (0);
2241 bad:
2242         if (deny_controlling_tty) {
2243                 session_lock(sessp);
2244                 sessp->s_flags &= ~S_NOCTTY;
2245                 session_unlock(sessp);
2246         }
2247         if (sessp != SESSION_NULL)
2248                 session_rele(sessp);
2249
2250         /* Modify local copy (to not damage thread copy) */
2251         context.vc_ucred = fp->f_fglob->fg_cred;
2252
2253         vn_close(vp, fp->f_fglob->fg_flag, &context);
2254         vnode_put(vp);
2255         fp_free(p, indx, fp);
2256
2257         return (error);
2258
2259 }
2260
2261 /*
2262  * An open system call using an extended argument list compared to the regular
2263  * system call 'open'.
2264  *
2265  * Parameters:  p                       Process requesting the open
2266  *              uap                     User argument descriptor (see below)
2267  *              retval                  Pointer to an area to receive the
2268  *                                      return calue from the system call
2269  *
2270  * Indirect:    uap->path               Path to open (same as 'open')
2271  *              uap->flags              Flags to open (same as 'open'
2272  *              uap->uid                UID to set, if creating
2273  *              uap->gid                GID to set, if creating
2274  *              uap->mode               File mode, if creating (same as 'open')
2275  *              uap->xsecurity          ACL to set, if creating
2276  *
2277  * Returns:     0                       Success
2278  *              !0                      errno value
2279  *
2280  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
2281  *
2282  * XXX:         We should enummerate the possible errno values here, and where
2283  *              in the code they originated.
2284  */
2285 int
2286 open_extended(proc_t p, struct open_extended_args *uap, register_t *retval)
2287 {
2288         struct filedesc *fdp = p->p_fd;
2289         int ciferror;
2290         kauth_filesec_t xsecdst;
2291         struct vnode_attr va;
2292         struct nameidata nd;
2293         int cmode;
2294
2295         xsecdst = NULL;
2296         if ((uap->xsecurity != USER_ADDR_NULL) &&
2297             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
2298                 return ciferror;
2299
2300         VATTR_INIT(&va);
2301         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2302         VATTR_SET(&va, va_mode, cmode);
2303         if (uap->uid != KAUTH_UID_NONE)
2304                 VATTR_SET(&va, va_uid, uap->uid);
2305         if (uap->gid != KAUTH_GID_NONE)
2306                 VATTR_SET(&va, va_gid, uap->gid);
2307         if (xsecdst != NULL)
2308                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
2309
2310         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
2311
2312         ciferror = open1(vfs_context_current(), &nd, uap->flags, &va, retval);
2313         if (xsecdst != NULL)
2314                 kauth_filesec_free(xsecdst);
2315
2316         return ciferror;
2317 }
2318
2319 int
2320 open(proc_t p, struct open_args *uap, register_t *retval)
2321 {
2322         __pthread_testcancel(1);
2323         return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
2324 }
2325
2326
2327 int
2328 open_nocancel(proc_t p, struct open_nocancel_args *uap, register_t *retval)
2329 {
2330         struct filedesc *fdp = p->p_fd;
2331         struct vnode_attr va;
2332         struct nameidata nd;
2333         int cmode;
2334
2335         VATTR_INIT(&va);
2336         /* Mask off all but regular access permissions */
2337         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2338         VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
2339
2340         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
2341
2342         return(open1(vfs_context_current(), &nd, uap->flags, &va, retval));
2343 }
2344
2345
2346 /*
2347  * Create a special file.
2348  */
2349 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
2350
2351 int
2352 mknod(proc_t p, struct mknod_args *uap, __unused register_t *retval)
2353 {
2354         struct vnode_attr va;
2355         vfs_context_t ctx = vfs_context_current();
2356         int error;
2357         int whiteout = 0;
2358         struct nameidata nd;
2359         vnode_t vp, dvp;
2360
2361         VATTR_INIT(&va);
2362         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2363         VATTR_SET(&va, va_rdev, uap->dev);
2364
2365         /* If it's a mknod() of a FIFO, call mkfifo1() instead */
2366         if ((uap->mode & S_IFMT) == S_IFIFO)
2367                 return(mkfifo1(ctx, uap->path, &va));
2368
2369         AUDIT_ARG(mode, uap->mode);
2370         AUDIT_ARG(dev, uap->dev);
2371
2372         if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
2373                 return (error);
2374         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
2375                 UIO_USERSPACE, uap->path, ctx);
2376         error = namei(&nd);
2377         if (error)
2378                 return (error);
2379         dvp = nd.ni_dvp;
2380         vp = nd.ni_vp;
2381
2382         if (vp != NULL) {
2383                 error = EEXIST;
2384                 goto out;
2385         }
2386
2387         switch (uap->mode & S_IFMT) {
2388         case S_IFMT:    /* used by badsect to flag bad sectors */
2389                 VATTR_SET(&va, va_type, VBAD);
2390                 break;
2391         case S_IFCHR:
2392                 VATTR_SET(&va, va_type, VCHR);
2393                 break;
2394         case S_IFBLK:
2395                 VATTR_SET(&va, va_type, VBLK);
2396                 break;
2397         case S_IFWHT:
2398                 whiteout = 1;
2399                 break;
2400         default:
2401                 error = EINVAL;
2402                 goto out;
2403         }
2404
2405 #if CONFIG_MACF
2406         if (!whiteout) {
2407                 error = mac_vnode_check_create(ctx,
2408                     nd.ni_dvp, &nd.ni_cnd, &va);
2409                 if (error)
2410                         goto out;
2411         }
2412 #endif
2413
2414         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2415                 goto out;
2416
2417         if (whiteout) {
2418                 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, CREATE, ctx);
2419         } else {
2420                 error = vn_create(dvp, &vp, &nd.ni_cnd, &va, 0, ctx);
2421         }
2422         if (error)
2423                 goto out;
2424
2425         if (vp) {
2426                 int     update_flags = 0;
2427
2428                 // Make sure the name & parent pointers are hooked up
2429                 if (vp->v_name == NULL)
2430                         update_flags |= VNODE_UPDATE_NAME;
2431                 if (vp->v_parent == NULLVP)
2432                         update_flags |= VNODE_UPDATE_PARENT;
2433
2434                 if (update_flags)
2435                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
2436
2437 #if CONFIG_FSE
2438                 add_fsevent(FSE_CREATE_FILE, ctx,
2439                     FSE_ARG_VNODE, vp,
2440                     FSE_ARG_DONE);
2441 #endif
2442         }
2443
2444 out:
2445         /*
2446          * nameidone has to happen before we vnode_put(dvp)
2447          * since it may need to release the fs_nodelock on the dvp
2448          */
2449         nameidone(&nd);
2450
2451         if (vp)
2452                 vnode_put(vp);
2453         vnode_put(dvp);
2454
2455         return (error);
2456 }
2457
2458 /*
2459  * Create a named pipe.
2460  *
2461  * Returns:     0                       Success
2462  *              EEXIST
2463  *      namei:???
2464  *      vnode_authorize:???
2465  *      vn_create:???
2466  */
2467 static int
2468 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
2469 {
2470         vnode_t vp, dvp;
2471         int error;
2472         struct nameidata nd;
2473
2474         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
2475                 UIO_USERSPACE, upath, ctx);
2476         error = namei(&nd);
2477         if (error)
2478                 return (error);
2479         dvp = nd.ni_dvp;
2480         vp = nd.ni_vp;
2481
2482         /* check that this is a new file and authorize addition */
2483         if (vp != NULL) {
2484                 error = EEXIST;
2485                 goto out;
2486         }
2487         VATTR_SET(vap, va_type, VFIFO);
2488
2489 #if CONFIG_MACF
2490         error = mac_vnode_check_create(ctx, nd.ni_dvp,
2491             &nd.ni_cnd, vap);
2492         if (error)
2493                 goto out;
2494 #endif
2495
2496
2497         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2498                 goto out;
2499
2500
2501         error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx);
2502 out:
2503         /*
2504          * nameidone has to happen before we vnode_put(dvp)
2505          * since it may need to release the fs_nodelock on the dvp
2506          */
2507         nameidone(&nd);
2508
2509         if (vp)
2510                 vnode_put(vp);
2511         vnode_put(dvp);
2512
2513         return error;
2514 }
2515
2516
2517 /*
2518  * A mkfifo system call using an extended argument list compared to the regular
2519  * system call 'mkfifo'.
2520  *
2521  * Parameters:  p                       Process requesting the open
2522  *              uap                     User argument descriptor (see below)
2523  *              retval                  (Ignored)
2524  *
2525  * Indirect:    uap->path               Path to fifo (same as 'mkfifo')
2526  *              uap->uid                UID to set
2527  *              uap->gid                GID to set
2528  *              uap->mode               File mode to set (same as 'mkfifo')
2529  *              uap->xsecurity          ACL to set, if creating
2530  *
2531  * Returns:     0                       Success
2532  *              !0                      errno value
2533  *
2534  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
2535  *
2536  * XXX:         We should enummerate the possible errno values here, and where
2537  *              in the code they originated.
2538  */
2539 int
2540 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused register_t *retval)
2541 {
2542         int ciferror;
2543         kauth_filesec_t xsecdst;
2544         struct vnode_attr va;
2545
2546         xsecdst = KAUTH_FILESEC_NONE;
2547         if (uap->xsecurity != USER_ADDR_NULL) {
2548                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
2549                         return ciferror;
2550         }
2551
2552         VATTR_INIT(&va);
2553         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2554         if (uap->uid != KAUTH_UID_NONE)
2555                 VATTR_SET(&va, va_uid, uap->uid);
2556         if (uap->gid != KAUTH_GID_NONE)
2557                 VATTR_SET(&va, va_gid, uap->gid);
2558         if (xsecdst != KAUTH_FILESEC_NONE)
2559                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
2560
2561         ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
2562
2563         if (xsecdst != KAUTH_FILESEC_NONE)
2564                 kauth_filesec_free(xsecdst);
2565         return ciferror;
2566 }
2567
2568 /* ARGSUSED */
2569 int
2570 mkfifo(proc_t p, struct mkfifo_args *uap, __unused register_t *retval)
2571 {
2572         struct vnode_attr va;
2573
2574         VATTR_INIT(&va);
2575         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2576
2577         return(mkfifo1(vfs_context_current(), uap->path, &va));
2578 }
2579
2580 /*
2581  * Make a hard file link.
2582  *
2583  * Returns:     0                       Success
2584  *              EPERM
2585  *              EEXIST
2586  *              EXDEV
2587  *      namei:???
2588  *      vnode_authorize:???
2589  *      VNOP_LINK:???
2590  */
2591 /* ARGSUSED */
2592 int
2593 link(__unused proc_t p, struct link_args *uap, __unused register_t *retval)
2594 {
2595         vnode_t vp, dvp, lvp;
2596         struct nameidata nd;
2597         vfs_context_t ctx = vfs_context_current();
2598         int error;
2599         fse_info finfo;
2600         int need_event, has_listeners;
2601         char *target_path = NULL;
2602
2603         vp = dvp = lvp = NULLVP;
2604
2605         /* look up the object we are linking to */
2606         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
2607                 UIO_USERSPACE, uap->path, ctx);
2608         error = namei(&nd);
2609         if (error)
2610                 return (error);
2611         vp = nd.ni_vp;
2612
2613         nameidone(&nd);
2614
2615         /*
2616          * Normally, linking to directories is not supported.
2617          * However, some file systems may have limited support.
2618          */
2619         if (vp->v_type == VDIR) {
2620                 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
2621                         error = EPERM;   /* POSIX */
2622                         goto out;
2623                 }
2624                 /* Linking to a directory requires ownership. */
2625                 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
2626                         struct vnode_attr dva;
2627
2628                         VATTR_INIT(&dva);
2629                         VATTR_WANTED(&dva, va_uid);
2630                         if (vnode_getattr(vp, &dva, ctx) != 0 ||
2631                             !VATTR_IS_SUPPORTED(&dva, va_uid) ||
2632                             (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
2633                                 error = EACCES;
2634                                 goto out;
2635                         }
2636                 }
2637         }
2638
2639         /* lookup the target node */
2640         nd.ni_cnd.cn_nameiop = CREATE;
2641         nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
2642         nd.ni_dirp = uap->link;
2643         error = namei(&nd);
2644         if (error != 0)
2645                 goto out;
2646         dvp = nd.ni_dvp;
2647         lvp = nd.ni_vp;
2648
2649 #if CONFIG_MACF
2650         if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
2651                 goto out2;
2652 #endif
2653
2654         /* or to anything that kauth doesn't want us to (eg. immutable items) */
2655         if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
2656                 goto out2;
2657
2658         /* target node must not exist */
2659         if (lvp != NULLVP) {
2660                 error = EEXIST;
2661                 goto out2;
2662         }
2663         /* cannot link across mountpoints */
2664         if (vnode_mount(vp) != vnode_mount(dvp)) {
2665                 error = EXDEV;
2666                 goto out2;
2667         }
2668
2669         /* authorize creation of the target note */
2670         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2671                 goto out2;
2672
2673         /* and finally make the link */
2674         error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
2675         if (error)
2676                 goto out2;
2677
2678 #if CONFIG_FSE
2679         need_event = need_fsevent(FSE_CREATE_FILE, dvp);
2680 #else
2681         need_event = 0;
2682 #endif
2683         has_listeners = kauth_authorize_fileop_has_listeners();
2684
2685         if (need_event || has_listeners) {
2686                 char *link_to_path = NULL;
2687                 int len, link_name_len;
2688
2689                 /* build the path to the new link file */
2690                 GET_PATH(target_path);
2691                 if (target_path == NULL) {
2692                         error = ENOMEM;
2693                         goto out2;
2694                 }
2695
2696                 len = MAXPATHLEN;
2697                 vn_getpath(dvp, target_path, &len);
2698                 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
2699                     target_path[len-1] = '/';
2700                     strlcpy(&target_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
2701                     len += nd.ni_cnd.cn_namelen;
2702                 }
2703
2704                 if (has_listeners) {
2705                         /* build the path to file we are linking to */
2706                         GET_PATH(link_to_path);
2707                         if (link_to_path == NULL) {
2708                                 error = ENOMEM;
2709                                 goto out2;
2710                         }
2711
2712                         link_name_len = MAXPATHLEN;
2713                         vn_getpath(vp, link_to_path, &link_name_len);
2714
2715                         /*
2716                          * Call out to allow 3rd party notification of rename.
2717                          * Ignore result of kauth_authorize_fileop call.
2718                          */
2719                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
2720                                                (uintptr_t)link_to_path, (uintptr_t)target_path);
2721                         if (link_to_path != NULL) {
2722                                 RELEASE_PATH(link_to_path);
2723                         }
2724                 }
2725 #if CONFIG_FSE
2726                 if (need_event) {
2727                         /* construct fsevent */
2728                         if (get_fse_info(vp, &finfo, ctx) == 0) {
2729                                 // build the path to the destination of the link
2730                                 add_fsevent(FSE_CREATE_FILE, ctx,
2731                                             FSE_ARG_STRING, len, target_path,
2732                                             FSE_ARG_FINFO, &finfo,
2733                                             FSE_ARG_DONE);
2734                         }
2735                 }
2736 #endif
2737         }
2738 out2:
2739         /*
2740          * nameidone has to happen before we vnode_put(dvp)
2741          * since it may need to release the fs_nodelock on the dvp
2742          */
2743         nameidone(&nd);
2744         if (target_path != NULL) {
2745                 RELEASE_PATH(target_path);
2746         }
2747 out:
2748         if (lvp)
2749                 vnode_put(lvp);
2750         if (dvp)
2751                 vnode_put(dvp);
2752         vnode_put(vp);
2753         return (error);
2754 }
2755
2756 /*
2757  * Make a symbolic link.
2758  *
2759  * We could add support for ACLs here too...
2760  */
2761 /* ARGSUSED */
2762 int
2763 symlink(proc_t p, struct symlink_args *uap, __unused register_t *retval)
2764 {
2765         struct vnode_attr va;
2766         char *path;
2767         int error;
2768         struct nameidata nd;
2769         vfs_context_t ctx = vfs_context_current();
2770         vnode_t vp, dvp;
2771         size_t dummy=0;
2772
2773         MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
2774         error = copyinstr(uap->path, path, MAXPATHLEN, &dummy);
2775         if (error)
2776                 goto out;
2777         AUDIT_ARG(text, path);  /* This is the link string */
2778
2779         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
2780                 UIO_USERSPACE, uap->link, ctx);
2781         error = namei(&nd);
2782         if (error)
2783                 goto out;
2784         dvp = nd.ni_dvp;
2785         vp = nd.ni_vp;
2786
2787         VATTR_INIT(&va);
2788         VATTR_SET(&va, va_type, VLNK);
2789         VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
2790 #if CONFIG_MACF
2791         error = mac_vnode_check_create(ctx,
2792                         dvp, &nd.ni_cnd, &va);
2793 #endif
2794         if (error != 0) {
2795             goto skipit;
2796         }
2797
2798         if (vp != NULL) {
2799             error = EEXIST;
2800             goto skipit;
2801         }
2802
2803         /* authorize */
2804         if (error == 0)
2805                 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
2806         /* get default ownership, etc. */
2807         if (error == 0)
2808                 error = vnode_authattr_new(dvp, &va, 0, ctx);
2809         if (error == 0)
2810                 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
2811
2812         /* do fallback attribute handling */
2813         if (error == 0)
2814                 error = vnode_setattr_fallback(vp, &va, ctx);
2815
2816         if (error == 0) {
2817                 int     update_flags = 0;
2818
2819                 if (vp == NULL) {
2820                         nd.ni_cnd.cn_nameiop = LOOKUP;
2821                         nd.ni_cnd.cn_flags = 0;
2822                         error = namei(&nd);
2823                         vp = nd.ni_vp;
2824
2825                         if (vp == NULL)
2826                                 goto skipit;
2827                 }
2828
2829 #if 0  /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
2830                 /* call out to allow 3rd party notification of rename.
2831                  * Ignore result of kauth_authorize_fileop call.
2832                  */
2833                 if (kauth_authorize_fileop_has_listeners() &&
2834                     namei(&nd) == 0) {
2835                         char *new_link_path = NULL;
2836                         int             len;
2837
2838                         /* build the path to the new link file */
2839                         new_link_path = get_pathbuff();
2840                         len = MAXPATHLEN;
2841                         vn_getpath(dvp, new_link_path, &len);
2842                         if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
2843                                 new_link_path[len - 1] = '/';
2844                                 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
2845                         }
2846
2847                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
2848                                            (uintptr_t)path, (uintptr_t)new_link_path);
2849                         if (new_link_path != NULL)
2850                                 release_pathbuff(new_link_path);
2851                 }
2852 #endif
2853                 // Make sure the name & parent pointers are hooked up
2854                 if (vp->v_name == NULL)
2855                         update_flags |= VNODE_UPDATE_NAME;
2856                 if (vp->v_parent == NULLVP)
2857                         update_flags |= VNODE_UPDATE_PARENT;
2858
2859                 if (update_flags)
2860                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
2861
2862 #if CONFIG_FSE
2863                 add_fsevent(FSE_CREATE_FILE, ctx,
2864                             FSE_ARG_VNODE, vp,
2865                             FSE_ARG_DONE);
2866 #endif
2867         }
2868
2869 skipit:
2870         /*
2871          * nameidone has to happen before we vnode_put(dvp)
2872          * since it may need to release the fs_nodelock on the dvp
2873          */
2874         nameidone(&nd);
2875
2876         if (vp)
2877                 vnode_put(vp);
2878         vnode_put(dvp);
2879 out:
2880         FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
2881
2882         return (error);
2883 }
2884
2885 /*
2886  * Delete a whiteout from the filesystem.
2887  */
2888 /* ARGSUSED */
2889 #warning XXX authorization not implmented for whiteouts
2890 int
2891 undelete(__unused proc_t p, struct undelete_args *uap, __unused register_t *retval)
2892 {
2893         int error;
2894         struct nameidata nd;
2895         vfs_context_t ctx = vfs_context_current();
2896         vnode_t vp, dvp;
2897
2898         NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT|AUDITVNPATH1,
2899                 UIO_USERSPACE, uap->path, ctx);
2900         error = namei(&nd);
2901         if (error)
2902                 return (error);
2903         dvp = nd.ni_dvp;
2904         vp = nd.ni_vp;
2905
2906         if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2907                 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, ctx);
2908         } else
2909                 error = EEXIST;
2910
2911         /*
2912          * nameidone has to happen before we vnode_put(dvp)
2913          * since it may need to release the fs_nodelock on the dvp
2914          */
2915         nameidone(&nd);
2916
2917         if (vp)
2918                 vnode_put(vp);
2919         vnode_put(dvp);
2920
2921         return (error);
2922 }
2923
2924 /*
2925  * Delete a name from the filesystem.
2926  */
2927 /* ARGSUSED */
2928 int
2929 unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy)
2930 {
2931         vnode_t vp, dvp;
2932         int error;
2933         struct componentname *cnp;
2934         char  *path = NULL;
2935         int  len;
2936         fse_info  finfo;
2937         int flags = 0;
2938         int need_event = 0;
2939         int has_listeners = 0;
2940
2941 #if NAMEDRSRCFORK
2942         /* unlink or delete is allowed on rsrc forks and named streams */
2943         ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
2944 #endif
2945
2946         ndp->ni_cnd.cn_flags |= LOCKPARENT;
2947         cnp = &ndp->ni_cnd;
2948
2949         error = namei(ndp);
2950         if (error)
2951                 return (error);
2952         dvp = ndp->ni_dvp;
2953         vp = ndp->ni_vp;
2954
2955         /* With Carbon delete semantics, busy files cannot be deleted */
2956         if (nodelbusy) {
2957                 flags |= VNODE_REMOVE_NODELETEBUSY;
2958         }
2959
2960         /*
2961          * Normally, unlinking of directories is not supported.
2962          * However, some file systems may have limited support.
2963          */
2964         if ((vp->v_type == VDIR) &&
2965             !(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
2966                 error = EPERM;  /* POSIX */
2967         }
2968
2969         /*
2970          * The root of a mounted filesystem cannot be deleted.
2971          */
2972         if (vp->v_flag & VROOT) {
2973                 error = EBUSY;
2974         }
2975         if (error)
2976                 goto out;
2977
2978
2979         /* authorize the delete operation */
2980 #if CONFIG_MACF
2981         if (!error)
2982                 error = mac_vnode_check_unlink(ctx,
2983                     dvp, vp, cnp);
2984 #endif /* MAC */
2985         if (!error)
2986                 error = vnode_authorize(vp, ndp->ni_dvp, KAUTH_VNODE_DELETE, ctx);
2987         if (error)
2988                 goto out;
2989
2990 #if CONFIG_FSE
2991         need_event = need_fsevent(FSE_DELETE, dvp);
2992         if (need_event) {
2993                 if ((vp->v_flag & VISHARDLINK) == 0) {
2994                         get_fse_info(vp, &finfo, ctx);
2995                 }
2996         }
2997 #endif
2998         has_listeners = kauth_authorize_fileop_has_listeners();
2999         if (need_event || has_listeners) {
3000                 GET_PATH(path);
3001                 if (path == NULL) {
3002                         error = ENOMEM;
3003                         goto out;
3004                 }
3005                 len = MAXPATHLEN;
3006                 vn_getpath(vp, path, &len);
3007         }
3008
3009 #if NAMEDRSRCFORK
3010         if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK)
3011                 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
3012         else
3013 #endif
3014                 error = VNOP_REMOVE(dvp, vp, &ndp->ni_cnd, flags, ctx);
3015
3016         /*
3017          * Call out to allow 3rd party notification of delete.
3018          * Ignore result of kauth_authorize_fileop call.
3019          */
3020         if (!error) {
3021                 if (has_listeners) {
3022                         kauth_authorize_fileop(vfs_context_ucred(ctx),
3023                                 KAUTH_FILEOP_DELETE,
3024                                 (uintptr_t)vp,
3025                                 (uintptr_t)path);
3026                 }
3027
3028                 if (vp->v_flag & VISHARDLINK) {
3029                     //
3030                     // if a hardlink gets deleted we want to blow away the
3031                     // v_parent link because the path that got us to this
3032                     // instance of the link is no longer valid.  this will
3033                     // force the next call to get the path to ask the file
3034                     // system instead of just following the v_parent link.
3035                     //
3036                     vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
3037                 }
3038
3039 #if CONFIG_FSE
3040                 if (need_event) {
3041                         if (vp->v_flag & VISHARDLINK) {
3042                                 get_fse_info(vp, &finfo, ctx);
3043                         }
3044                         add_fsevent(FSE_DELETE, ctx,
3045                                                 FSE_ARG_STRING, len, path,
3046                                                 FSE_ARG_FINFO, &finfo,
3047                                                 FSE_ARG_DONE);
3048                 }
3049 #endif
3050         }
3051         if (path != NULL)
3052                 RELEASE_PATH(path);
3053
3054         /*
3055          * nameidone has to happen before we vnode_put(dvp)
3056          * since it may need to release the fs_nodelock on the dvp
3057          */
3058 out:
3059 #if NAMEDRSRCFORK
3060         /* recycle deleted rsrc fork to force reclaim on shadow file if necessary */
3061         if ((vnode_isnamedstream(ndp->ni_vp)) &&
3062                         (ndp->ni_vp->v_parent != NULLVP) &&
3063                         (vnode_isshadow(ndp->ni_vp))) {
3064                 vnode_recycle(ndp->ni_vp);
3065         }
3066 #endif
3067
3068         nameidone(ndp);
3069         vnode_put(dvp);
3070         vnode_put(vp);
3071         return (error);
3072 }
3073
3074 /*
3075  * Delete a name from the filesystem using POSIX semantics.
3076  */
3077 int
3078 unlink(__unused proc_t p, struct unlink_args *uap, __unused register_t *retval)
3079 {
3080         struct nameidata nd;
3081         vfs_context_t ctx = vfs_context_current();
3082
3083         NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
3084         return unlink1(ctx, &nd, 0);
3085 }
3086
3087 /*
3088  * Delete a name from the filesystem using Carbon semantics.
3089  */
3090 int
3091 delete(__unused proc_t p, struct delete_args *uap, __unused register_t *retval)
3092 {
3093         struct nameidata nd;
3094         vfs_context_t ctx = vfs_context_current();
3095
3096         NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
3097         return unlink1(ctx, &nd, 1);
3098 }
3099
3100 /*
3101  * Reposition read/write file offset.
3102  */
3103 int
3104 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
3105 {
3106         struct fileproc *fp;
3107         vnode_t vp;
3108         struct vfs_context *ctx;
3109         off_t offset = uap->offset, file_size;
3110         int error;
3111
3112         if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
3113                 if (error == ENOTSUP)
3114                         return (ESPIPE);
3115                 return (error);
3116         }
3117         if (vnode_isfifo(vp)) {
3118                 file_drop(uap->fd);
3119                 return(ESPIPE);
3120         }
3121
3122
3123         ctx = vfs_context_current();
3124 #if CONFIG_MACF
3125         if (uap->whence == L_INCR && uap->offset == 0)
3126                 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
3127                     fp->f_fglob);
3128         else
3129                 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
3130                     fp->f_fglob);
3131         if (error) {
3132                 file_drop(uap->fd);
3133                 return (error);
3134         }
3135 #endif
3136         if ( (error = vnode_getwithref(vp)) ) {
3137                 file_drop(uap->fd);
3138                 return(error);
3139         }
3140
3141         switch (uap->whence) {
3142         case L_INCR:
3143                 offset += fp->f_fglob->fg_offset;
3144                 break;
3145         case L_XTND:
3146                 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
3147                         break;
3148                 offset += file_size;
3149                 break;
3150         case L_SET:
3151                 break;
3152         default:
3153                 error = EINVAL;
3154         }
3155         if (error == 0) {
3156                 if (uap->offset > 0 && offset < 0) {
3157                         /* Incremented/relative move past max size */
3158                         error = EOVERFLOW;
3159                 } else {
3160                         /*
3161                          * Allow negative offsets on character devices, per
3162                          * POSIX 1003.1-2001.  Most likely for writing disk
3163                          * labels.
3164                          */
3165                         if (offset < 0 && vp->v_type != VCHR) {
3166                                 /* Decremented/relative move before start */
3167                                 error = EINVAL;
3168                         } else {
3169                                 /* Success */
3170                                 fp->f_fglob->fg_offset = offset;
3171                                 *retval = fp->f_fglob->fg_offset;
3172                         }
3173                 }
3174         }
3175         (void)vnode_put(vp);
3176         file_drop(uap->fd);
3177         return (error);
3178 }
3179
3180
3181 /*
3182  * Check access permissions.
3183  *
3184  * Returns:     0                       Success
3185  *              vnode_authorize:???
3186  */
3187 static int
3188 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
3189 {
3190         kauth_action_t action;
3191         int error;
3192
3193         /*
3194          * If just the regular access bits, convert them to something
3195          * that vnode_authorize will understand.
3196          */
3197         if (!(uflags & _ACCESS_EXTENDED_MASK)) {
3198                 action = 0;
3199                 if (uflags & R_OK)
3200                         action |= KAUTH_VNODE_READ_DATA;        /* aka KAUTH_VNODE_LIST_DIRECTORY */
3201                 if (uflags & W_OK) {
3202                         if (vnode_isdir(vp)) {
3203                                 action |= KAUTH_VNODE_ADD_FILE |
3204                                     KAUTH_VNODE_ADD_SUBDIRECTORY;
3205                                 /* might want delete rights here too */
3206                         } else {
3207                                 action |= KAUTH_VNODE_WRITE_DATA;
3208                         }
3209                 }
3210                 if (uflags & X_OK) {
3211                         if (vnode_isdir(vp)) {
3212                                 action |= KAUTH_VNODE_SEARCH;
3213                         } else {
3214                                 action |= KAUTH_VNODE_EXECUTE;
3215                         }
3216                 }
3217         } else {
3218                 /* take advantage of definition of uflags */
3219                 action = uflags >> 8;
3220         }
3221
3222 #if CONFIG_MACF
3223         error = mac_vnode_check_access(ctx, vp, uflags);
3224         if (error)
3225                 return (error);
3226 #endif /* MAC */
3227
3228         /* action == 0 means only check for existence */
3229         if (action != 0) {
3230                 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
3231         } else {
3232                 error = 0;
3233         }
3234
3235         return(error);
3236 }
3237
3238
3239
3240 /*
3241  * access_extended
3242  *
3243  * Description: uap->entries                    Pointer to argument descriptor
3244  *              uap->size                       Size of the area pointed to by
3245  *                                              the descriptor
3246  *              uap->results                    Pointer to the results array
3247  *
3248  * Returns:     0                       Success
3249  *              ENOMEM                  Insufficient memory
3250  *              EINVAL                  Invalid arguments
3251  *              namei:EFAULT            Bad address
3252  *              namei:ENAMETOOLONG      Filename too long
3253  *              namei:ENOENT            No such file or directory
3254  *              namei:ELOOP             Too many levels of symbolic links
3255  *              namei:EBADF             Bad file descriptor
3256  *              namei:ENOTDIR           Not a directory
3257  *              namei:???
3258  *              access1:
3259  *
3260  * Implicit returns:
3261  *              uap->results            Array contents modified
3262  *
3263  * Notes:       The uap->entries are structured as an arbitrary length array
3264  *              of accessx descriptors, followed by one or more NULL terniated
3265  *              strings
3266  *
3267  *                      struct accessx_descriptor[0]
3268  *                      ...
3269  *                      struct accessx_descriptor[n]
3270  *                      char name_data[0];
3271  *
3272  *              We determine the entry count by walking the buffer containing
3273  *              the uap->entries argument descriptor.  For each descrptor we
3274  *              see, the valid values for the offset ad_name_offset will be
3275  *              in the byte range:
3276  *
3277  *                      [ uap->entries + sizeof(struct accessx_descriptor) ]
3278  *                                              to
3279  *                              [ uap->entries + uap->size - 2 ]
3280  *
3281  *              since we must have at least one string, and the string must
3282  *              be at least one character plus the NUL terminator in length.
3283  *
3284  * XXX:         Need to support the check-as uid argument
3285  */
3286 int
3287 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused register_t *retval)
3288 {
3289         struct accessx_descriptor *input = NULL;
3290         errno_t *result = NULL;
3291         errno_t error = 0;
3292         int wantdelete = 0;
3293         unsigned int desc_max, desc_actual, i, j;
3294         struct vfs_context context;
3295         struct nameidata nd;
3296         int niopts;
3297         vnode_t vp = NULL;
3298         vnode_t dvp = NULL;
3299 #define ACCESSX_MAX_DESCR_ON_STACK 10
3300         struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
3301
3302         context.vc_ucred = NULL;
3303
3304         /*
3305          * Validate parameters; if valid, copy the descriptor array and string
3306          * arguments into local memory.  Before proceeding, the following
3307          * conditions must have been met:
3308          *
3309          * o    The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
3310          * o    There must be sufficient room in the request for at least one
3311          *      descriptor and a one yte NUL terminated string.
3312          * o    The allocation of local storage must not fail.
3313          */
3314         if (uap->size > ACCESSX_MAX_TABLESIZE)
3315                 return(ENOMEM);
3316         if (uap->size < (sizeof(struct accessx_descriptor) + 2))
3317                 return(EINVAL);
3318         if (uap->size <= sizeof (stack_input)) {
3319                 input = stack_input;
3320         } else {
3321         MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
3322         if (input == NULL) {
3323                 error = ENOMEM;
3324                 goto out;
3325         }
3326         }
3327         error = copyin(uap->entries, input, uap->size);
3328         if (error)
3329                 goto out;
3330
3331         /*
3332          * Force NUL termination of the copyin buffer to avoid nami() running
3333          * off the end.  If the caller passes us bogus data, they may get a
3334          * bogus result.
3335          */
3336         ((char *)input)[uap->size - 1] = 0;
3337
3338         /*
3339          * Access is defined as checking against the process' real identity,
3340          * even if operations are checking the effective identity.  This
3341          * requires that we use a local vfs context.
3342          */
3343         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
3344         context.vc_thread = current_thread();
3345
3346         /*
3347          * Find out how many entries we have, so we can allocate the result
3348          * array by walking the list and adjusting the count downward by the
3349          * earliest string offset we see.
3350          */
3351         desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
3352         desc_actual = desc_max;
3353         for (i = 0; i < desc_actual; i++) {
3354                 /*
3355                  * Take the offset to the name string for this entry and
3356                  * convert to an input array index, which would be one off
3357                  * the end of the array if this entry was the lowest-addressed
3358                  * name string.
3359                  */
3360                 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
3361
3362                 /*
3363                  * An offset greater than the max allowable offset is an error.
3364                  * It is also an error for any valid entry to point
3365                  * to a location prior to the end of the current entry, if
3366                  * it's not a reference to the string of the previous entry.
3367                  */
3368                 if (j > desc_max || (j != 0 && j <= i)) {
3369                         error = EINVAL;
3370                         goto out;
3371                 }
3372
3373                 /*
3374                  * An offset of 0 means use the previous descriptor's offset;
3375                  * this is used to chain multiple requests for the same file
3376                  * to avoid multiple lookups.
3377                  */
3378                 if (j == 0) {
3379                         /* This is not valid for the first entry */
3380                         if (i == 0) {
3381                                 error = EINVAL;
3382                                 goto out;
3383                         }
3384                         continue;
3385                 }
3386
3387                 /*
3388                  * If the offset of the string for this descriptor is before
3389                  * what we believe is the current actual last descriptor,
3390                  * then we need to adjust our estimate downward; this permits
3391                  * the string table following the last descriptor to be out
3392                  * of order relative to the descriptor list.
3393                  */
3394                 if (j < desc_actual)
3395                         desc_actual = j;
3396         }
3397
3398         /*
3399          * We limit the actual number of descriptors we are willing to process
3400          * to a hard maximum of ACCESSX_MAX_DESCRIPTORS.  If the number being
3401          * requested does not exceed this limit,
3402          */
3403         if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
3404                 error = ENOMEM;
3405                 goto out;
3406         }
3407         MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
3408         if (result == NULL) {
3409                 error = ENOMEM;
3410                 goto out;
3411         }
3412
3413         /*
3414          * Do the work by iterating over the descriptor entries we know to
3415          * at least appear to contain valid data.
3416          */
3417         error = 0;
3418         for (i = 0; i < desc_actual; i++) {
3419                 /*
3420                  * If the ad_name_offset is 0, then we use the previous
3421                  * results to make the check; otherwise, we are looking up
3422                  * a new file name.
3423                  */
3424                 if (input[i].ad_name_offset != 0) {
3425                         /* discard old vnodes */
3426                         if (vp) {
3427                                 vnode_put(vp);
3428                                 vp = NULL;
3429                         }
3430                         if (dvp) {
3431                                 vnode_put(dvp);
3432                                 dvp = NULL;
3433                         }
3434
3435                         /*
3436                          * Scan forward in the descriptor list to see if we
3437                          * need the parent vnode.  We will need it if we are
3438                          * deleting, since we must have rights  to remove
3439                          * entries in the parent directory, as well as the
3440                          * rights to delete the object itself.
3441                          */
3442                         wantdelete = input[i].ad_flags & _DELETE_OK;
3443                         for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
3444                                 if (input[j].ad_flags & _DELETE_OK)
3445                                         wantdelete = 1;
3446
3447                         niopts = FOLLOW | AUDITVNPATH1;
3448
3449                         /* need parent for vnode_authorize for deletion test */
3450                         if (wantdelete)
3451                                 niopts |= WANTPARENT;
3452
3453                         /* do the lookup */
3454                         NDINIT(&nd, LOOKUP, niopts, UIO_SYSSPACE, CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset), &context);
3455                         error = namei(&nd);
3456                         if (!error) {
3457                                 vp = nd.ni_vp;
3458                                 if (wantdelete)
3459                                         dvp = nd.ni_dvp;
3460                         }
3461                         nameidone(&nd);
3462                 }
3463
3464                 /*
3465                  * Handle lookup errors.
3466                  */
3467                 switch(error) {
3468                 case ENOENT:
3469                 case EACCES:
3470                 case EPERM:
3471                 case ENOTDIR:
3472                         result[i] = error;
3473                         break;
3474                 case 0:
3475                         /* run this access check */
3476                         result[i] = access1(vp, dvp, input[i].ad_flags, &context);
3477                         break;
3478                 default:
3479                         /* fatal lookup error */
3480
3481                         goto out;
3482                 }
3483         }
3484
3485         /* copy out results */
3486         error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
3487
3488 out:
3489         if (input && input != stack_input)
3490                 FREE(input, M_TEMP);
3491         if (result)
3492                 FREE(result, M_TEMP);
3493         if (vp)
3494                 vnode_put(vp);
3495         if (dvp)
3496                 vnode_put(dvp);
3497         if (IS_VALID_CRED(context.vc_ucred))
3498                 kauth_cred_unref(&context.vc_ucred);
3499         return(error);
3500 }
3501
3502
3503 /*
3504  * Returns:     0                       Success
3505  *              namei:EFAULT            Bad address
3506  *              namei:ENAMETOOLONG      Filename too long
3507  *              namei:ENOENT            No such file or directory
3508  *              namei:ELOOP             Too many levels of symbolic links
3509  *              namei:EBADF             Bad file descriptor
3510  *              namei:ENOTDIR           Not a directory
3511  *              namei:???
3512  *              access1:
3513  */
3514 int
3515 access(__unused proc_t p, struct access_args *uap, __unused register_t *retval)
3516 {
3517         int error;
3518         struct nameidata nd;
3519         int niopts;
3520         struct vfs_context context;
3521
3522 #if NAMEDRSRCFORK
3523         int is_namedstream = 0;
3524 #endif
3525
3526         /*
3527          * Access is defined as checking against the process'
3528          * real identity, even if operations are checking the
3529          * effective identity.  So we need to tweak the credential
3530          * in the context.
3531          */
3532         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
3533         context.vc_thread = current_thread();
3534
3535         niopts = FOLLOW | AUDITVNPATH1;
3536         /* need parent for vnode_authorize for deletion test */
3537         if (uap->flags & _DELETE_OK)
3538                 niopts |= WANTPARENT;
3539         NDINIT(&nd, LOOKUP, niopts, UIO_USERSPACE, uap->path, &context);
3540
3541 #if NAMEDRSRCFORK
3542         /* access(F_OK) calls are allowed for resource forks. */
3543         if (uap->flags == F_OK)
3544                 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
3545 #endif
3546         error = namei(&nd);
3547         if (error)
3548                 goto out;
3549
3550 #if NAMEDRSRCFORK
3551         /* Grab reference on the shadow stream file vnode to
3552          * force an inactive on release which will mark it for
3553          * recycle
3554          */
3555         if (vnode_isnamedstream(nd.ni_vp) &&
3556                         (nd.ni_vp->v_parent != NULLVP) &&
3557                         (vnode_isshadow(nd.ni_vp))) {
3558                 is_namedstream = 1;
3559                 vnode_ref(nd.ni_vp);
3560         }
3561 #endif
3562
3563         error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context);
3564
3565 #if NAMEDRSRCFORK
3566         if (is_namedstream) {
3567                 vnode_rele(nd.ni_vp);
3568         }
3569 #endif
3570
3571         vnode_put(nd.ni_vp);
3572         if (uap->flags & _DELETE_OK)
3573                 vnode_put(nd.ni_dvp);
3574         nameidone(&nd);
3575
3576 out:
3577         kauth_cred_unref(&context.vc_ucred);
3578         return(error);
3579 }
3580
3581
3582 /*
3583  * Returns:     0                       Success
3584  *              EFAULT
3585  *      copyout:EFAULT
3586  *      namei:???
3587  *      vn_stat:???
3588  */
3589 static int
3590 stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3591 {
3592         struct stat sb;
3593         struct stat64 sb64;
3594         struct user_stat user_sb;
3595         struct user_stat64 user_sb64;
3596         caddr_t sbp;
3597         int error, my_size;
3598         kauth_filesec_t fsec;
3599         size_t xsecurity_bufsize;
3600         void * statptr;
3601
3602 #if NAMEDRSRCFORK
3603         int is_namedstream = 0;
3604         /* stat calls are allowed for resource forks. */
3605         ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
3606 #endif
3607         error = namei(ndp);
3608         if (error)
3609                 return (error);
3610         fsec = KAUTH_FILESEC_NONE;
3611         if (isstat64 != 0)
3612                 statptr  = (void *)&sb64;
3613         else
3614                 statptr  = (void *)&sb;
3615
3616 #if NAMEDRSRCFORK
3617         /* Grab reference on the shadow stream file vnode to
3618          * force an inactive on release which will mark it for
3619          * recycle.
3620          */
3621         if (vnode_isnamedstream(ndp->ni_vp) &&
3622                         (ndp->ni_vp->v_parent != NULLVP) &&
3623                         (vnode_isshadow(ndp->ni_vp))) {
3624                 is_namedstream = 1;
3625                 vnode_ref (ndp->ni_vp);
3626         }
3627 #endif
3628
3629         error = vn_stat(ndp->ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
3630
3631 #if NAMEDRSRCFORK
3632         if (is_namedstream) {
3633                 vnode_rele (ndp->ni_vp);
3634         }
3635 #endif
3636
3637         vnode_put(ndp->ni_vp);
3638         nameidone(ndp);
3639
3640         if (error)
3641                 return (error);
3642         /* Zap spare fields */
3643         if (isstat64 != 0) {
3644                 sb64.st_lspare = 0;
3645                 sb64.st_qspare[0] = 0LL;
3646                 sb64.st_qspare[1] = 0LL;
3647                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
3648                         munge_stat64(&sb64, &user_sb64);
3649                         my_size = sizeof(user_sb64);
3650                         sbp = (caddr_t)&user_sb64;
3651                 } else {
3652                         my_size = sizeof(sb64);
3653                         sbp = (caddr_t)&sb64;
3654                 }
3655                 /*
3656                  * Check if we raced (post lookup) against the last unlink of a file.
3657                  */
3658                 if ((sb64.st_nlink == 0) && S_ISREG(sb64.st_mode)) {
3659                         sb64.st_nlink = 1;
3660                 }
3661         } else {
3662                 sb.st_lspare = 0;
3663                 sb.st_qspare[0] = 0LL;
3664                 sb.st_qspare[1] = 0LL;
3665                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
3666                         munge_stat(&sb, &user_sb);
3667                         my_size = sizeof(user_sb);
3668                         sbp = (caddr_t)&user_sb;
3669                 } else {
3670                         my_size = sizeof(sb);
3671                         sbp = (caddr_t)&sb;
3672                 }
3673
3674                 /*
3675                  * Check if we raced (post lookup) against the last unlink of a file.
3676                  */
3677                 if ((sb.st_nlink == 0) && S_ISREG(sb.st_mode)) {
3678                         sb.st_nlink = 1;
3679                 }
3680         }
3681         if ((error = copyout(sbp, ub, my_size)) != 0)
3682                 goto out;
3683
3684         /* caller wants extended security information? */
3685         if (xsecurity != USER_ADDR_NULL) {
3686
3687                 /* did we get any? */
3688                 if (fsec == KAUTH_FILESEC_NONE) {
3689                         if (susize(xsecurity_size, 0) != 0) {
3690                                 error = EFAULT;
3691                                 goto out;
3692                         }
3693                 } else {
3694                         /* find the user buffer size */
3695                         xsecurity_bufsize = fusize(xsecurity_size);
3696
3697                         /* copy out the actual data size */
3698                         if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
3699                                 error = EFAULT;
3700                                 goto out;
3701                         }
3702
3703                         /* if the caller supplied enough room, copy out to it */
3704                         if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
3705                                 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
3706                 }
3707         }
3708 out:
3709         if (fsec != KAUTH_FILESEC_NONE)
3710                 kauth_filesec_free(fsec);
3711         return (error);
3712 }
3713
3714 /*
3715  * Get file status; this version follows links.
3716  *
3717  * Returns:     0                       Success
3718  *      stat2:???                       [see stat2() in this file]
3719  */
3720 static int
3721 stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3722 {
3723         struct nameidata nd;
3724         vfs_context_t ctx = vfs_context_current();
3725
3726         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
3727             UIO_USERSPACE, path, ctx);
3728         return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
3729 }
3730
3731 int
3732 stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused register_t *retval)
3733 {
3734         return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
3735 }
3736
3737 /*
3738  * Returns:     0                       Success
3739  *      stat1:???                       [see stat1() in this file]
3740  */
3741 int
3742 stat(__unused proc_t p, struct stat_args *uap, __unused register_t *retval)
3743 {
3744         return(stat1(uap->path, uap->ub, 0, 0, 0));
3745 }
3746
3747 int
3748 stat64(__unused proc_t p, struct stat64_args *uap, __unused register_t *retval)
3749 {
3750         return(stat1(uap->path, uap->ub, 0, 0, 1));
3751 }
3752
3753 int
3754 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused register_t *retval)
3755 {
3756         return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
3757 }
3758 /*
3759  * Get file status; this version does not follow links.
3760  */
3761 static int
3762 lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3763 {
3764         struct nameidata nd;
3765         vfs_context_t ctx = vfs_context_current();
3766
3767         NDINIT(&nd, LOOKUP, NOTRIGGER | NOFOLLOW | AUDITVNPATH1,
3768             UIO_USERSPACE, path, ctx);
3769
3770         return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
3771 }
3772
3773 int
3774 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused register_t *retval)
3775 {
3776         return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
3777 }
3778
3779 int
3780 lstat(__unused proc_t p, struct lstat_args *uap, __unused register_t *retval)
3781 {
3782         return(lstat1(uap->path, uap->ub, 0, 0, 0));
3783 }
3784 int
3785 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused register_t *retval)
3786 {
3787         return(lstat1(uap->path, uap->ub, 0, 0, 1));
3788 }
3789
3790 int
3791 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused register_t *retval)
3792 {
3793         return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
3794 }
3795
3796 /*
3797  * Get configurable pathname variables.
3798  *
3799  * Returns:     0                       Success
3800  *      namei:???
3801  *      vn_pathconf:???
3802  *
3803  * Notes:       Global implementation  constants are intended to be
3804  *              implemented in this function directly; all other constants
3805  *              are per-FS implementation, and therefore must be handled in
3806  *              each respective FS, instead.
3807  *
3808  * XXX We implement some things globally right now that should actually be
3809  * XXX per-FS; we will need to deal with this at some point.
3810  */
3811 /* ARGSUSED */
3812 int
3813 pathconf(__unused proc_t p, struct pathconf_args *uap, register_t *retval)
3814 {
3815         int error;
3816         struct nameidata nd;
3817         vfs_context_t ctx = vfs_context_current();
3818
3819         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
3820                 UIO_USERSPACE, uap->path, ctx);
3821         error = namei(&nd);
3822         if (error)
3823                 return (error);
3824
3825         error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
3826
3827         vnode_put(nd.ni_vp);
3828         nameidone(&nd);
3829         return (error);
3830 }
3831
3832 /*
3833  * Return target name of a symbolic link.
3834  */
3835 /* ARGSUSED */
3836 int
3837 readlink(proc_t p, struct readlink_args *uap, register_t *retval)
3838 {
3839         vnode_t vp;
3840         uio_t auio;
3841         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
3842         int error;
3843         struct nameidata nd;
3844         vfs_context_t ctx = vfs_context_current();
3845         char uio_buf[ UIO_SIZEOF(1) ];
3846
3847         NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNPATH1,
3848                 UIO_USERSPACE, uap->path, ctx);
3849         error = namei(&nd);
3850         if (error)
3851                 return (error);
3852         vp = nd.ni_vp;
3853
3854         nameidone(&nd);
3855
3856         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
3857                                                                   &uio_buf[0], sizeof(uio_buf));
3858         uio_addiov(auio, uap->buf, uap->count);
3859         if (vp->v_type != VLNK)
3860                 error = EINVAL;
3861         else {
3862 #if CONFIG_MACF
3863                 error = mac_vnode_check_readlink(ctx,
3864                     vp);
3865 #endif
3866                 if (error == 0)
3867                         error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx);
3868                 if (error == 0)
3869                         error = VNOP_READLINK(vp, auio, ctx);
3870         }
3871         vnode_put(vp);
3872         // LP64todo - fix this
3873         *retval = uap->count - (int)uio_resid(auio);
3874         return (error);
3875 }
3876
3877 /*
3878  * Change file flags.
3879  */
3880 static int
3881 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
3882 {
3883         struct vnode_attr va;
3884         kauth_action_t action;
3885         int error;
3886
3887         VATTR_INIT(&va);
3888         VATTR_SET(&va, va_flags, flags);
3889
3890 #if CONFIG_MACF
3891         error = mac_vnode_check_setflags(ctx, vp, flags);
3892         if (error)
3893                 goto out;
3894 #endif
3895
3896         /* request authorisation, disregard immutability */
3897         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
3898                 goto out;
3899         /*
3900          * Request that the auth layer disregard those file flags it's allowed to when
3901          * authorizing this operation; we need to do this in order to be able to
3902          * clear immutable flags.
3903          */
3904         if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
3905                 goto out;
3906         error = vnode_setattr(vp, &va, ctx);
3907
3908         if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
3909                 error = ENOTSUP;
3910         }
3911 out:
3912         vnode_put(vp);
3913         return(error);
3914 }
3915
3916 /*
3917  * Change flags of a file given a path name.
3918  */
3919 /* ARGSUSED */
3920 int
3921 chflags(__unused proc_t p, struct chflags_args *uap, __unused register_t *retval)
3922 {
3923         vnode_t vp;
3924         vfs_context_t ctx = vfs_context_current();
3925         int error;
3926         struct nameidata nd;
3927
3928         AUDIT_ARG(fflags, uap->flags);
3929         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
3930                 UIO_USERSPACE, uap->path, ctx);
3931         error = namei(&nd);
3932         if (error)
3933                 return (error);
3934         vp = nd.ni_vp;
3935         nameidone(&nd);
3936
3937         error = chflags1(vp, uap->flags, ctx);
3938
3939         return(error);
3940 }
3941
3942 /*
3943  * Change flags of a file given a file descriptor.
3944  */
3945 /* ARGSUSED */
3946 int
3947 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused register_t *retval)
3948 {
3949         vnode_t vp;
3950         int error;
3951
3952         AUDIT_ARG(fd, uap->fd);
3953         AUDIT_ARG(fflags, uap->flags);
3954         if ( (error = file_vnode(uap->fd, &vp)) )
3955                 return (error);
3956
3957         if ((error = vnode_getwithref(vp))) {
3958                 file_drop(uap->fd);
3959                 return(error);
3960         }
3961
3962         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3963
3964         error = chflags1(vp, uap->flags, vfs_context_current());
3965
3966         file_drop(uap->fd);
3967         return (error);
3968 }
3969
3970 /*
3971  * Change security information on a filesystem object.
3972  *
3973  * Returns:     0                       Success
3974  *              EPERM                   Operation not permitted
3975  *              vnode_authattr:???      [anything vnode_authattr can return]
3976  *              vnode_authorize:???     [anything vnode_authorize can return]
3977  *              vnode_setattr:???       [anything vnode_setattr can return]
3978  *
3979  * Notes:       If vnode_authattr or vnode_authorize return EACCES, it will be
3980  *              translated to EPERM before being returned.
3981  */
3982 static int
3983 chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
3984 {
3985         kauth_action_t action;
3986         int error;
3987
3988         AUDIT_ARG(mode, (mode_t)vap->va_mode);
3989 #warning XXX audit new args
3990
3991 #if NAMEDSTREAMS
3992         /* chmod calls are not allowed for resource forks. */
3993         if (vp->v_flag & VISNAMEDSTREAM) {
3994                 return (EPERM);
3995         }
3996 #endif
3997
3998 #if CONFIG_MACF
3999         error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode);
4000         if (error)
4001                 return (error);
4002 #endif
4003
4004         /* make sure that the caller is allowed to set this security information */
4005         if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
4006             ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
4007                 if (error == EACCES)
4008                         error = EPERM;
4009                 return(error);
4010         }
4011
4012         error = vnode_setattr(vp, vap, ctx);
4013
4014         return (error);
4015 }
4016
4017
4018 /*
4019  * Change mode of a file given path name.
4020  *
4021  * Returns:     0                       Success
4022  *              namei:???               [anything namei can return]
4023  *              chmod2:???              [anything chmod2 can return]
4024  */
4025 static int
4026 chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
4027 {
4028         struct nameidata nd;
4029         int error;
4030
4031         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4032                 UIO_USERSPACE, path, ctx);
4033         if ((error = namei(&nd)))
4034                 return (error);
4035         error = chmod2(ctx, nd.ni_vp, vap);
4036         vnode_put(nd.ni_vp);
4037         nameidone(&nd);
4038         return(error);
4039 }
4040
4041 /*
4042  * A chmod system call using an extended argument list compared to the regular
4043  * system call 'mkfifo'.
4044  *
4045  * Parameters:  p                       Process requesting the open
4046  *              uap                     User argument descriptor (see below)
4047  *              retval                  (ignored)
4048  *
4049  * Indirect:    uap->path               Path to object (same as 'chmod')
4050  *              uap->uid                UID to set
4051  *              uap->gid                GID to set
4052  *              uap->mode               File mode to set (same as 'chmod')
4053  *              uap->xsecurity          ACL to set (or delete)
4054  *
4055  * Returns:     0                       Success
4056  *              !0                      errno value
4057  *
4058  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
4059  *
4060  * XXX:         We should enummerate the possible errno values here, and where
4061  *              in the code they originated.
4062  */
4063 int
4064 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused register_t *retval)
4065 {
4066         int error;
4067         struct vnode_attr va;
4068         kauth_filesec_t xsecdst;
4069
4070         VATTR_INIT(&va);
4071         if (uap->mode != -1)
4072                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4073         if (uap->uid != KAUTH_UID_NONE)
4074                 VATTR_SET(&va, va_uid, uap->uid);
4075         if (uap->gid != KAUTH_GID_NONE)
4076                 VATTR_SET(&va, va_gid, uap->gid);
4077
4078         xsecdst = NULL;
4079         switch(uap->xsecurity) {
4080                 /* explicit remove request */
4081         case CAST_USER_ADDR_T((void *)1):       /* _FILESEC_REMOVE_ACL */
4082                 VATTR_SET(&va, va_acl, NULL);
4083                 break;
4084                 /* not being set */
4085         case USER_ADDR_NULL:
4086                 break;
4087         default:
4088                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4089                         return(error);
4090                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4091                 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
4092         }
4093
4094         error = chmod1(vfs_context_current(), uap->path, &va);
4095
4096         if (xsecdst != NULL)
4097                 kauth_filesec_free(xsecdst);
4098         return(error);
4099 }
4100
4101 /*
4102  * Returns:     0                       Success
4103  *              chmod1:???              [anything chmod1 can return]
4104  */
4105 int
4106 chmod(__unused proc_t p, struct chmod_args *uap, __unused register_t *retval)
4107 {
4108         struct vnode_attr va;
4109
4110         VATTR_INIT(&va);
4111         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4112
4113         return(chmod1(vfs_context_current(), uap->path, &va));
4114 }
4115
4116 /*
4117  * Change mode of a file given a file descriptor.
4118  */
4119 static int
4120 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
4121 {
4122         vnode_t vp;
4123         int error;
4124
4125         AUDIT_ARG(fd, fd);
4126
4127         if ((error = file_vnode(fd, &vp)) != 0)
4128                 return (error);
4129         if ((error = vnode_getwithref(vp)) != 0) {
4130                 file_drop(fd);
4131                 return(error);
4132         }
4133         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4134
4135         error = chmod2(vfs_context_current(), vp, vap);
4136         (void)vnode_put(vp);
4137         file_drop(fd);
4138
4139         return (error);
4140 }
4141
4142 int
4143 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused register_t *retval)
4144 {
4145         int error;
4146         struct vnode_attr va;
4147         kauth_filesec_t xsecdst;
4148
4149         VATTR_INIT(&va);
4150         if (uap->mode != -1)
4151                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4152         if (uap->uid != KAUTH_UID_NONE)
4153                 VATTR_SET(&va, va_uid, uap->uid);
4154         if (uap->gid != KAUTH_GID_NONE)
4155                 VATTR_SET(&va, va_gid, uap->gid);
4156
4157         xsecdst = NULL;
4158         switch(uap->xsecurity) {
4159         case USER_ADDR_NULL:
4160                 VATTR_SET(&va, va_acl, NULL);
4161                 break;
4162         case CAST_USER_ADDR_T(-1):
4163                 break;
4164         default:
4165                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4166                         return(error);
4167                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4168         }
4169
4170         error = fchmod1(p, uap->fd, &va);
4171
4172
4173         switch(uap->xsecurity) {
4174         case USER_ADDR_NULL:
4175         case CAST_USER_ADDR_T(-1):
4176                 break;
4177         default:
4178                 if (xsecdst != NULL)
4179                         kauth_filesec_free(xsecdst);
4180         }
4181         return(error);
4182 }
4183
4184 int
4185 fchmod(proc_t p, struct fchmod_args *uap, __unused register_t *retval)
4186 {
4187         struct vnode_attr va;
4188
4189         VATTR_INIT(&va);
4190         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4191
4192         return(fchmod1(p, uap->fd, &va));
4193 }
4194
4195
4196 /*
4197  * Set ownership given a path name.
4198  */
4199 /* ARGSUSED */
4200 static int
4201 chown1(vfs_context_t ctx, struct chown_args *uap, __unused register_t *retval, int follow)
4202 {
4203         vnode_t vp;
4204         struct vnode_attr va;
4205         int error;
4206         struct nameidata nd;
4207         kauth_action_t action;
4208
4209         AUDIT_ARG(owner, uap->uid, uap->gid);
4210
4211         NDINIT(&nd, LOOKUP, (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1,
4212                 UIO_USERSPACE, uap->path, ctx);
4213         error = namei(&nd);
4214         if (error)
4215                 return (error);
4216         vp = nd.ni_vp;
4217
4218         nameidone(&nd);
4219
4220         VATTR_INIT(&va);
4221         if (uap->uid != VNOVAL)
4222                 VATTR_SET(&va, va_uid, uap->uid);
4223         if (uap->gid != VNOVAL)
4224                 VATTR_SET(&va, va_gid, uap->gid);
4225
4226 #if CONFIG_MACF
4227         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
4228         if (error)
4229                 goto out;
4230 #endif
4231
4232         /* preflight and authorize attribute changes */
4233         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4234                 goto out;
4235         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
4236                 goto out;
4237         error = vnode_setattr(vp, &va, ctx);
4238
4239 out:
4240         /*
4241          * EACCES is only allowed from namei(); permissions failure should
4242          * return EPERM, so we need to translate the error code.
4243          */
4244         if (error == EACCES)
4245                 error = EPERM;
4246
4247         vnode_put(vp);
4248         return (error);
4249 }
4250
4251 int
4252 chown(__unused proc_t p, struct chown_args *uap, register_t *retval)
4253 {
4254         return chown1(vfs_context_current(), uap, retval, 1);
4255 }
4256
4257 int
4258 lchown(__unused proc_t p, struct lchown_args *uap, register_t *retval)
4259 {
4260         /* Argument list identical, but machine generated; cast for chown1() */
4261         return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0);
4262 }
4263
4264 /*
4265  * Set ownership given a file descriptor.
4266  */
4267 /* ARGSUSED */
4268 int
4269 fchown(__unused proc_t p, struct fchown_args *uap, __unused register_t *retval)
4270 {
4271         struct vnode_attr va;
4272         vfs_context_t ctx = vfs_context_current();
4273         vnode_t vp;
4274         int error;
4275         kauth_action_t action;
4276
4277         AUDIT_ARG(owner, uap->uid, uap->gid);
4278         AUDIT_ARG(fd, uap->fd);
4279
4280         if ( (error = file_vnode(uap->fd, &vp)) )
4281                 return (error);
4282
4283         if ( (error = vnode_getwithref(vp)) ) {
4284                 file_drop(uap->fd);
4285                 return(error);
4286         }
4287         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4288
4289         VATTR_INIT(&va);
4290         if (uap->uid != VNOVAL)
4291                 VATTR_SET(&va, va_uid, uap->uid);
4292         if (uap->gid != VNOVAL)
4293                 VATTR_SET(&va, va_gid, uap->gid);
4294
4295 #if NAMEDSTREAMS
4296         /* chown calls are not allowed for resource forks. */
4297         if (vp->v_flag & VISNAMEDSTREAM) {
4298                 error = EPERM;
4299                 goto out;
4300         }
4301 #endif
4302
4303 #if CONFIG_MACF
4304         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
4305         if (error)
4306                 goto out;
4307 #endif
4308
4309         /* preflight and authorize attribute changes */
4310         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4311                 goto out;
4312         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
4313                 if (error == EACCES)
4314                         error = EPERM;
4315                 goto out;
4316         }
4317         error = vnode_setattr(vp, &va, ctx);
4318
4319 out:
4320         (void)vnode_put(vp);
4321         file_drop(uap->fd);
4322         return (error);
4323 }
4324
4325 static int
4326 getutimes(user_addr_t usrtvp, struct timespec *tsp)
4327 {
4328         struct user_timeval tv[2];
4329         int error;
4330
4331         if (usrtvp == USER_ADDR_NULL) {
4332                 struct timeval old_tv;
4333                 /* XXX Y2038 bug because of microtime argument */
4334                 microtime(&old_tv);
4335                 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
4336                 tsp[1] = tsp[0];
4337         } else {
4338                 if (IS_64BIT_PROCESS(current_proc())) {
4339                         error = copyin(usrtvp, (void *)tv, sizeof(tv));
4340                 } else {
4341                         struct timeval old_tv[2];
4342                         error = copyin(usrtvp, (void *)old_tv, sizeof(old_tv));
4343                         tv[0].tv_sec = old_tv[0].tv_sec;
4344                         tv[0].tv_usec = old_tv[0].tv_usec;
4345                         tv[1].tv_sec = old_tv[1].tv_sec;
4346                         tv[1].tv_usec = old_tv[1].tv_usec;
4347                 }
4348                 if (error)
4349                         return (error);
4350                 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
4351                 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
4352         }
4353         return 0;
4354 }
4355
4356 static int
4357 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
4358         int nullflag)
4359 {
4360         int error;
4361         struct vnode_attr va;
4362         kauth_action_t action;
4363
4364         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4365
4366         VATTR_INIT(&va);
4367         VATTR_SET(&va, va_access_time, ts[0]);
4368         VATTR_SET(&va, va_modify_time, ts[1]);
4369         if (nullflag)
4370                 va.va_vaflags |= VA_UTIMES_NULL;
4371
4372 #if NAMEDSTREAMS
4373         /* utimes calls are not allowed for resource forks. */
4374         if (vp->v_flag & VISNAMEDSTREAM) {
4375                 error = EPERM;
4376                 goto out;
4377         }
4378 #endif
4379
4380 #if CONFIG_MACF
4381         error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
4382         if (error)
4383                 goto out;
4384 #endif
4385         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
4386                 if (!nullflag && error == EACCES)
4387                         error = EPERM;
4388                 goto out;
4389         }
4390
4391         /* since we may not need to auth anything, check here */
4392         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
4393                 if (!nullflag && error == EACCES)
4394                         error = EPERM;
4395                 goto out;
4396         }
4397         error = vnode_setattr(vp, &va, ctx);
4398
4399 out:
4400         return error;
4401 }
4402
4403 /*
4404  * Set the access and modification times of a file.
4405  */
4406 /* ARGSUSED */
4407 int
4408 utimes(__unused proc_t p, struct utimes_args *uap, __unused register_t *retval)
4409 {
4410         struct timespec ts[2];
4411         user_addr_t usrtvp;
4412         int error;
4413         struct nameidata nd;
4414         vfs_context_t ctx = vfs_context_current();
4415
4416         /*
4417          * AUDIT: Needed to change the order of operations to do the
4418          * name lookup first because auditing wants the path.
4419          */
4420         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4421                 UIO_USERSPACE, uap->path, ctx);
4422         error = namei(&nd);
4423         if (error)
4424                 return (error);
4425         nameidone(&nd);
4426
4427         /*
4428          * Fetch the user-supplied time.  If usrtvp is USER_ADDR_NULL, we fetch
4429          * the current time instead.
4430          */
4431         usrtvp = uap->tptr;
4432         if ((error = getutimes(usrtvp, ts)) != 0)
4433                 goto out;
4434
4435         error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
4436
4437 out:
4438         vnode_put(nd.ni_vp);
4439         return (error);
4440 }
4441
4442 /*
4443  * Set the access and modification times of a file.
4444  */
4445 /* ARGSUSED */
4446 int
4447 futimes(__unused proc_t p, struct futimes_args *uap, __unused register_t *retval)
4448 {
4449         struct timespec ts[2];
4450         vnode_t vp;
4451         user_addr_t usrtvp;
4452         int error;
4453
4454         AUDIT_ARG(fd, uap->fd);
4455         usrtvp = uap->tptr;
4456         if ((error = getutimes(usrtvp, ts)) != 0)
4457                 return (error);
4458         if ((error = file_vnode(uap->fd, &vp)) != 0)
4459                 return (error);
4460         if((error = vnode_getwithref(vp))) {
4461                 file_drop(uap->fd);
4462                 return(error);
4463         }
4464
4465         error =  setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
4466         vnode_put(vp);
4467         file_drop(uap->fd);
4468         return(error);
4469 }
4470
4471 /*
4472  * Truncate a file given its path name.
4473  */
4474 /* ARGSUSED */
4475 int
4476 truncate(__unused proc_t p, struct truncate_args *uap, __unused register_t *retval)
4477 {
4478         vnode_t vp;
4479         struct vnode_attr va;
4480         vfs_context_t ctx = vfs_context_current();
4481         int error;
4482         struct nameidata nd;
4483         kauth_action_t action;
4484
4485         if (uap->length < 0)
4486                 return(EINVAL);
4487         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4488                 UIO_USERSPACE, uap->path, ctx);
4489         if ((error = namei(&nd)))
4490                 return (error);
4491         vp = nd.ni_vp;
4492
4493         nameidone(&nd);
4494
4495         VATTR_INIT(&va);
4496         VATTR_SET(&va, va_data_size, uap->length);
4497
4498 #if CONFIG_MACF
4499         error = mac_vnode_check_truncate(ctx, NOCRED, vp);
4500         if (error)
4501                 goto out;
4502 #endif
4503
4504         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4505                 goto out;
4506         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
4507                 goto out;
4508         error = vnode_setattr(vp, &va, ctx);
4509 out:
4510         vnode_put(vp);
4511         return (error);
4512 }
4513
4514 /*
4515  * Truncate a file given a file descriptor.
4516  */
4517 /* ARGSUSED */
4518 int
4519 ftruncate(proc_t p, struct ftruncate_args *uap, register_t *retval)
4520 {
4521         vfs_context_t ctx = vfs_context_current();
4522         struct vnode_attr va;
4523         vnode_t vp;
4524         struct fileproc *fp;
4525         int error ;
4526         int fd = uap->fd;
4527
4528         AUDIT_ARG(fd, uap->fd);
4529         if (uap->length < 0)
4530                 return(EINVAL);
4531
4532         if ( (error = fp_lookup(p,fd,&fp,0)) ) {
4533                 return(error);
4534         }
4535
4536         if (fp->f_fglob->fg_type == DTYPE_PSXSHM) {
4537                 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
4538                 goto out;
4539         }
4540         if (fp->f_fglob->fg_type != DTYPE_VNODE)  {
4541                 error = EINVAL;
4542                 goto out;
4543         }
4544
4545         vp = (vnode_t)fp->f_fglob->fg_data;
4546
4547         if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
4548                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
4549                 error = EINVAL;
4550                 goto out;
4551         }
4552
4553         if ((error = vnode_getwithref(vp)) != 0) {
4554                 goto out;
4555         }
4556
4557         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4558
4559 #if CONFIG_MACF
4560         error = mac_vnode_check_truncate(ctx,
4561             fp->f_fglob->fg_cred, vp);
4562         if (error) {
4563                 (void)vnode_put(vp);
4564                 goto out;
4565         }
4566 #endif
4567         VATTR_INIT(&va);
4568         VATTR_SET(&va, va_data_size, uap->length);
4569         error = vnode_setattr(vp, &va, ctx);
4570         (void)vnode_put(vp);
4571 out:
4572         file_drop(fd);
4573         return (error);
4574 }
4575
4576
4577 /*
4578  * Sync an open file.
4579  */
4580 /* ARGSUSED */
4581 int
4582 fsync(proc_t p, struct fsync_args *uap, register_t *retval)
4583 {
4584         __pthread_testcancel(1);
4585         return(fsync_nocancel(p, (struct fsync_nocancel_args *)uap, retval));
4586 }
4587
4588 int
4589 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused register_t *retval)
4590 {
4591         vnode_t vp;
4592         struct fileproc *fp;
4593         vfs_context_t ctx = vfs_context_current();
4594         int error;
4595
4596         if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
4597                 return (error);
4598         if ( (error = vnode_getwithref(vp)) ) {
4599                 file_drop(uap->fd);
4600                 return(error);
4601         }
4602
4603         error = VNOP_FSYNC(vp, MNT_WAIT, ctx);
4604
4605 #if NAMEDRSRCFORK
4606         /* Sync resource fork shadow file if necessary. */
4607         if ((error == 0) &&
4608             (vp->v_flag & VISNAMEDSTREAM) &&
4609             (vp->v_parent != NULLVP) &&
4610             (vnode_isshadow(vp)) &&
4611             (fp->f_flags & FP_WRITTEN)) {
4612                 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
4613         }
4614 #endif
4615
4616         (void)vnode_put(vp);
4617         file_drop(uap->fd);
4618         return (error);
4619 }
4620
4621 /*
4622  * Duplicate files.  Source must be a file, target must be a file or
4623  * must not exist.
4624  *
4625  * XXX Copyfile authorisation checking is woefully inadequate, and will not
4626  *     perform inheritance correctly.
4627  */
4628 /* ARGSUSED */
4629 int
4630 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused register_t *retval)
4631 {
4632         vnode_t tvp, fvp, tdvp, sdvp;
4633         struct nameidata fromnd, tond;
4634         int error;
4635         vfs_context_t ctx = vfs_context_current();
4636
4637         /* Check that the flags are valid. */
4638
4639         if (uap->flags & ~CPF_MASK) {
4640                 return(EINVAL);
4641         }
4642
4643         NDINIT(&fromnd, LOOKUP, SAVESTART | AUDITVNPATH1,
4644                 UIO_USERSPACE, uap->from, ctx);
4645         if ((error = namei(&fromnd)))
4646                 return (error);
4647         fvp = fromnd.ni_vp;
4648
4649         NDINIT(&tond, CREATE,  LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
4650             UIO_USERSPACE, uap->to, ctx);
4651         if ((error = namei(&tond))) {
4652                 goto out1;
4653         }
4654         tdvp = tond.ni_dvp;
4655         tvp = tond.ni_vp;
4656
4657         if (tvp != NULL) {
4658                 if (!(uap->flags & CPF_OVERWRITE)) {
4659                         error = EEXIST;
4660                         goto out;
4661                 }
4662         }
4663         if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
4664                 error = EISDIR;
4665                 goto out;
4666         }
4667
4668         if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4669                 goto out;
4670
4671         if (fvp == tdvp)
4672                 error = EINVAL;
4673         /*
4674          * If source is the same as the destination (that is the
4675          * same inode number) then there is nothing to do.
4676          * (fixed to have POSIX semantics - CSM 3/2/98)
4677          */
4678         if (fvp == tvp)
4679                 error = -1;
4680         if (!error)
4681                 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
4682 out:
4683         sdvp = tond.ni_startdir;
4684         /*
4685          * nameidone has to happen before we vnode_put(tdvp)
4686          * since it may need to release the fs_nodelock on the tdvp
4687          */
4688         nameidone(&tond);
4689
4690         if (tvp)
4691                 vnode_put(tvp);
4692         vnode_put(tdvp);
4693         vnode_put(sdvp);
4694 out1:
4695         vnode_put(fvp);
4696
4697         if (fromnd.ni_startdir)
4698                 vnode_put(fromnd.ni_startdir);
4699         nameidone(&fromnd);
4700
4701         if (error == -1)
4702                 return (0);
4703         return (error);
4704 }
4705
4706
4707 /*
4708  * Rename files.  Source and destination must either both be directories,
4709  * or both not be directories.  If target is a directory, it must be empty.
4710  */
4711 /* ARGSUSED */
4712 int
4713 rename(__unused proc_t p, struct rename_args *uap, __unused register_t *retval)
4714 {
4715         vnode_t tvp, tdvp;
4716         vnode_t fvp, fdvp;
4717         struct nameidata fromnd, tond;
4718         vfs_context_t ctx = vfs_context_current();
4719         int error;
4720         int do_retry;
4721         int mntrename;
4722         int need_event;
4723         const char *oname;
4724         char *from_name = NULL, *to_name = NULL;
4725         int from_len, to_len;
4726         int holding_mntlock;
4727         mount_t locked_mp = NULL;
4728         vnode_t oparent;
4729         fse_info from_finfo, to_finfo;
4730
4731         holding_mntlock = 0;
4732     do_retry = 0;
4733 retry:
4734         fvp = tvp = NULL;
4735         fdvp = tdvp = NULL;
4736         mntrename = FALSE;
4737
4738         NDINIT(&fromnd, DELETE, WANTPARENT | AUDITVNPATH1, UIO_USERSPACE, uap->from, ctx);
4739
4740         if ( (error = namei(&fromnd)) )
4741                 goto out1;
4742         fdvp = fromnd.ni_dvp;
4743         fvp  = fromnd.ni_vp;
4744
4745 #if CONFIG_MACF
4746         error = mac_vnode_check_rename_from(ctx, fdvp, fvp, &fromnd.ni_cnd);
4747         if (error)
4748                 goto out1;
4749 #endif
4750
4751         NDINIT(&tond, RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK , UIO_USERSPACE, uap->to, ctx);
4752         if (fvp->v_type == VDIR)
4753                 tond.ni_cnd.cn_flags |= WILLBEDIR;
4754
4755         if ( (error = namei(&tond)) ) {
4756                 /*
4757                  * Translate error code for rename("dir1", "dir2/.").
4758                  */
4759                 if (error == EISDIR && fvp->v_type == VDIR)
4760                         error = EINVAL;
4761                 goto out1;
4762         }
4763         tdvp = tond.ni_dvp;
4764         tvp  = tond.ni_vp;
4765
4766 #if CONFIG_MACF
4767         error = mac_vnode_check_rename_to(ctx,
4768             tdvp, tvp, fdvp == tdvp, &tond.ni_cnd);
4769         if (error)
4770                 goto out1;
4771 #endif
4772
4773         if (tvp != NULL) {
4774                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
4775                         error = ENOTDIR;
4776                         goto out1;
4777                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
4778                         error = EISDIR;
4779                         goto out1;
4780                 }
4781         }
4782         if (fvp == tdvp) {
4783                 error = EINVAL;
4784                 goto out1;
4785         }
4786         /*
4787          * If the source and destination are the same (i.e. they're
4788          * links to the same vnode) and the target file system is
4789          * case sensitive, then there is nothing to do.
4790          */
4791         if (fvp == tvp) {
4792                 int pathconf_val;
4793
4794                 /*
4795                  * Note: if _PC_CASE_SENSITIVE selector isn't supported,
4796                  * then assume that this file system is case sensitive.
4797                  */
4798                 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
4799                     pathconf_val != 0) {
4800                         goto out1;
4801                 }
4802         }
4803
4804         /*
4805          * Authorization.
4806          *
4807          * If tvp is a directory and not the same as fdvp, or tdvp is not
4808          * the same as fdvp, the node is moving between directories and we
4809          * need rights to remove from the old and add to the new.
4810          *
4811          * If tvp already exists and is not a directory, we need to be
4812          * allowed to delete it.
4813          *
4814          * Note that we do not inherit when renaming.
4815          *
4816          * XXX This needs to be revisited to implement the deferred-inherit bit
4817          */
4818         {
4819                 int moving = 0;
4820
4821                 error = 0;
4822                 if ((tvp != NULL) && vnode_isdir(tvp)) {
4823                         if (tvp != fdvp)
4824                                 moving = 1;
4825                 } else if (tdvp != fdvp) {
4826                         moving = 1;
4827                 }
4828                 /*
4829                  * must have delete rights to remove the old name even in
4830                  * the simple case of fdvp == tdvp.
4831                  *
4832                  * If fvp is a directory, and we are changing it's parent,
4833                  * then we also need rights to rewrite its ".." entry as well.
4834                  */
4835                 if (vnode_isdir(fvp)) {
4836                         if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE | KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
4837                                 goto auth_exit;
4838                 } else {
4839                 if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE, ctx)) != 0)
4840                         goto auth_exit;
4841                 }
4842                 if (moving) {
4843                         /* moving into tdvp or tvp, must have rights to add */
4844                         if ((error = vnode_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp,
4845                                  NULL,
4846                                  vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE,
4847                                  ctx)) != 0) {
4848                 /*
4849                  * We could encounter a race where after doing the namei, tvp stops
4850                  * being valid. If so, simply re-drive the rename call from the
4851                  * top.
4852                  */
4853                  if (error == ENOENT) {
4854                      do_retry = 1;
4855                  }
4856                                 goto auth_exit;
4857                         }
4858                 } else {
4859                         /* node staying in same directory, must be allowed to add new name */
4860                         if ((error = vnode_authorize(fdvp, NULL,
4861                                  vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4862                                 goto auth_exit;
4863                 }
4864                 /* overwriting tvp */
4865                 if ((tvp != NULL) && !vnode_isdir(tvp) &&
4866                     ((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx)) != 0)) {
4867             /*
4868              * We could encounter a race where after doing the namei, tvp stops
4869              * being valid. If so, simply re-drive the rename call from the
4870              * top.
4871              */
4872             if (error == ENOENT) {
4873                 do_retry = 1;
4874             }
4875                         goto auth_exit;
4876                 }
4877
4878                 /* XXX more checks? */
4879
4880 auth_exit:
4881                 /* authorization denied */
4882                 if (error != 0)
4883                         goto out1;
4884         }
4885         /*
4886          * Allow the renaming of mount points.
4887          * - target must not exist
4888          * - target must reside in the same directory as source
4889          * - union mounts cannot be renamed
4890          * - "/" cannot be renamed
4891          */
4892         if ((fvp->v_flag & VROOT) &&
4893             (fvp->v_type == VDIR) &&
4894             (tvp == NULL)  &&
4895             (fvp->v_mountedhere == NULL)  &&
4896             (fdvp == tdvp)  &&
4897             ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0)  &&
4898             (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
4899                 vnode_t coveredvp;
4900
4901                 /* switch fvp to the covered vnode */
4902                 coveredvp = fvp->v_mount->mnt_vnodecovered;
4903                 if ( (vnode_getwithref(coveredvp)) ) {
4904                         error = ENOENT;
4905                         goto out1;
4906                 }
4907                 vnode_put(fvp);
4908
4909                 fvp = coveredvp;
4910                 mntrename = TRUE;
4911         }
4912         /*
4913          * Check for cross-device rename.
4914          */
4915         if ((fvp->v_mount != tdvp->v_mount) ||
4916             (tvp && (fvp->v_mount != tvp->v_mount))) {
4917                 error = EXDEV;
4918                 goto out1;
4919         }
4920         /*
4921          * Avoid renaming "." and "..".
4922          */
4923         if (fvp->v_type == VDIR &&
4924             ((fdvp == fvp) ||
4925              (fromnd.ni_cnd.cn_namelen == 1 && fromnd.ni_cnd.cn_nameptr[0] == '.') ||
4926              ((fromnd.ni_cnd.cn_flags | tond.ni_cnd.cn_flags) & ISDOTDOT)) ) {
4927                 error = EINVAL;
4928                 goto out1;
4929         }
4930         /*
4931          * The following edge case is caught here:
4932          * (to cannot be a descendent of from)
4933          *
4934          *       o fdvp
4935          *      /
4936          *     /
4937          *    o fvp
4938          *     \
4939          *      \
4940          *       o tdvp
4941          *      /
4942          *     /
4943          *    o tvp
4944          */
4945         if (tdvp->v_parent == fvp) {
4946                 error = EINVAL;
4947                 goto out1;
4948         }
4949
4950         /*
4951          * If source is the same as the destination (that is the
4952          * same inode number) then there is nothing to do...
4953          * EXCEPT if the underlying file system supports case
4954          * insensitivity and is case preserving.  In this case
4955          * the file system needs to handle the special case of
4956          * getting the same vnode as target (fvp) and source (tvp).
4957          *
4958          * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
4959          * and _PC_CASE_PRESERVING can have this exception, and they need to
4960          * handle the special case of getting the same vnode as target and
4961          * source.  NOTE: Then the target is unlocked going into vnop_rename,
4962          * so not to cause locking problems. There is a single reference on tvp.
4963          *
4964          * NOTE - that fvp == tvp also occurs if they are hard linked - NOTE
4965          * that correct behaviour then is just to remove the source (link)
4966          */
4967         if (fvp == tvp && fdvp == tdvp) {
4968                 if (fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
4969                     !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
4970                           fromnd.ni_cnd.cn_namelen)) {
4971                         goto out1;
4972                 }
4973         }
4974
4975         if (holding_mntlock && fvp->v_mount != locked_mp) {
4976                 /*
4977                  * we're holding a reference and lock
4978                  * on locked_mp, but it no longer matches
4979                  * what we want to do... so drop our hold
4980                  */
4981                 mount_unlock_renames(locked_mp);
4982                 mount_drop(locked_mp, 0);
4983                 holding_mntlock = 0;
4984         }
4985         if (tdvp != fdvp && fvp->v_type == VDIR) {
4986                 /*
4987                  * serialize renames that re-shape
4988                  * the tree... if holding_mntlock is
4989                  * set, then we're ready to go...
4990                  * otherwise we
4991                  * first need to drop the iocounts
4992                  * we picked up, second take the
4993                  * lock to serialize the access,
4994                  * then finally start the lookup
4995                  * process over with the lock held
4996                  */
4997                 if (!holding_mntlock) {
4998                         /*
4999                          * need to grab a reference on
5000                          * the mount point before we
5001                          * drop all the iocounts... once
5002                          * the iocounts are gone, the mount
5003                          * could follow
5004                          */
5005                         locked_mp = fvp->v_mount;
5006                         mount_ref(locked_mp, 0);
5007
5008                         /*
5009                          * nameidone has to happen before we vnode_put(tvp)
5010                          * since it may need to release the fs_nodelock on the tvp
5011                          */
5012                         nameidone(&tond);
5013
5014                         if (tvp)
5015                                 vnode_put(tvp);
5016                         vnode_put(tdvp);
5017
5018                         /*
5019                          * nameidone has to happen before we vnode_put(fdvp)
5020                          * since it may need to release the fs_nodelock on the fvp
5021                          */
5022                         nameidone(&fromnd);
5023
5024                         vnode_put(fvp);
5025                         vnode_put(fdvp);
5026
5027                         mount_lock_renames(locked_mp);
5028                         holding_mntlock = 1;
5029
5030                         goto retry;
5031                 }
5032         } else {
5033                 /*
5034                  * when we dropped the iocounts to take
5035                  * the lock, we allowed the identity of
5036                  * the various vnodes to change... if they did,
5037                  * we may no longer be dealing with a rename
5038                  * that reshapes the tree... once we're holding
5039                  * the iocounts, the vnodes can't change type
5040                  * so we're free to drop the lock at this point
5041                  * and continue on
5042                  */
5043                 if (holding_mntlock) {
5044                         mount_unlock_renames(locked_mp);
5045                         mount_drop(locked_mp, 0);
5046                         holding_mntlock = 0;
5047                 }
5048         }
5049         // save these off so we can later verify that fvp is the same
5050         oname   = fvp->v_name;
5051         oparent = fvp->v_parent;
5052
5053 #if CONFIG_FSE
5054         need_event = need_fsevent(FSE_RENAME, fvp);
5055         if (need_event) {
5056                 get_fse_info(fvp, &from_finfo, ctx);
5057
5058                 if (tvp) {
5059                         get_fse_info(tvp, &to_finfo, ctx);
5060                 }
5061         }
5062 #else
5063         need_event = 0;
5064 #endif /* CONFIG_FSE */
5065
5066         if (need_event || kauth_authorize_fileop_has_listeners()) {
5067                 GET_PATH(from_name);
5068                 if (from_name == NULL) {
5069                         error = ENOMEM;
5070                         goto out1;
5071                 }
5072                 from_len = MAXPATHLEN;
5073                 vn_getpath(fdvp, from_name, &from_len);
5074                 if ((from_len + 1 + fromnd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
5075                     if (from_len > 2) {
5076                         from_name[from_len-1] = '/';
5077                     } else {
5078                         from_len--;
5079                     }
5080                     strlcpy(&from_name[from_len], fromnd.ni_cnd.cn_nameptr, MAXPATHLEN-from_len);
5081                     from_len += fromnd.ni_cnd.cn_namelen + 1;
5082                     from_name[from_len] = '\0';
5083                 }
5084
5085                 GET_PATH(to_name);
5086                 if (to_name == NULL) {
5087                         error = ENOMEM;
5088                         goto out1;
5089                 }
5090
5091                 to_len = MAXPATHLEN;
5092                 vn_getpath(tdvp, to_name, &to_len);
5093                 // if the path is not just "/", then append a "/"
5094                 if ((to_len + 1 + tond.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
5095                     if (to_len > 2) {
5096                         to_name[to_len-1] = '/';
5097                     } else {
5098                         to_len--;
5099                     }
5100                     strlcpy(&to_name[to_len], tond.ni_cnd.cn_nameptr, MAXPATHLEN-to_len);
5101                     to_len += tond.ni_cnd.cn_namelen + 1;
5102                     to_name[to_len] = '\0';
5103                 }
5104         }
5105
5106         error = VNOP_RENAME(fdvp, fvp, &fromnd.ni_cnd,
5107                             tdvp, tvp, &tond.ni_cnd,
5108                             ctx);
5109
5110         if (holding_mntlock) {
5111                 /*
5112                  * we can drop our serialization
5113                  * lock now
5114                  */
5115                 mount_unlock_renames(locked_mp);
5116                 mount_drop(locked_mp, 0);
5117                 holding_mntlock = 0;
5118         }
5119         if (error) {
5120         /*
5121          * We may encounter a race in the VNOP where the destination didn't
5122          * exist when we did the namei, but it does by the time we go and
5123                  * try to create the entry. In this case, we should re-drive this rename
5124                  * call from the top again.  Currently, only HFS bubbles out ERECYCLE,
5125                  * but other filesystem susceptible to this race could return it, too.
5126                  */
5127         if (error == ERECYCLE) {
5128             do_retry = 1;
5129         }
5130
5131                 goto out1;
5132         }
5133
5134         /* call out to allow 3rd party notification of rename.
5135          * Ignore result of kauth_authorize_fileop call.
5136          */
5137         kauth_authorize_fileop(vfs_context_ucred(ctx),
5138                         KAUTH_FILEOP_RENAME,
5139                         (uintptr_t)from_name, (uintptr_t)to_name);
5140
5141 #if CONFIG_FSE
5142         if (from_name != NULL && to_name != NULL) {
5143                 if (tvp) {
5144                         add_fsevent(FSE_RENAME, ctx,
5145                                     FSE_ARG_STRING, from_len, from_name,
5146                                     FSE_ARG_FINFO, &from_finfo,
5147                                     FSE_ARG_STRING, to_len, to_name,
5148                                     FSE_ARG_FINFO, &to_finfo,
5149                                     FSE_ARG_DONE);
5150                 } else {
5151                         add_fsevent(FSE_RENAME, ctx,
5152                                     FSE_ARG_STRING, from_len, from_name,
5153                                     FSE_ARG_FINFO, &from_finfo,
5154                                     FSE_ARG_STRING, to_len, to_name,
5155                                     FSE_ARG_DONE);
5156                 }
5157         }
5158 #endif /* CONFIG_FSE */
5159
5160         /*
5161          * update filesystem's mount point data
5162          */
5163         if (mntrename) {
5164                 char *cp, *pathend, *mpname;
5165                 char * tobuf;
5166                 struct mount *mp;
5167                 int maxlen;
5168                 size_t len = 0;
5169
5170                 mp = fvp->v_mountedhere;
5171
5172                 if (vfs_busy(mp, LK_NOWAIT)) {
5173                         error = EBUSY;
5174                         goto out1;
5175                 }
5176                 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
5177
5178                 error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len);
5179                 if (!error) {
5180                         /* find current mount point prefix */
5181                         pathend = &mp->mnt_vfsstat.f_mntonname[0];
5182                         for (cp = pathend; *cp != '\0'; ++cp) {
5183                                 if (*cp == '/')
5184                                         pathend = cp + 1;
5185                         }
5186                         /* find last component of target name */
5187                         for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
5188                                 if (*cp == '/')
5189                                         mpname = cp + 1;
5190                         }
5191                         /* append name to prefix */
5192                         maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
5193                         bzero(pathend, maxlen);
5194                         strlcpy(pathend, mpname, maxlen);
5195                 }
5196                 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
5197
5198                 vfs_unbusy(mp);
5199         }
5200         /*
5201          * fix up name & parent pointers.  note that we first
5202          * check that fvp has the same name/parent pointers it
5203          * had before the rename call... this is a 'weak' check
5204          * at best...
5205          */
5206         if (oname == fvp->v_name && oparent == fvp->v_parent) {
5207                 int update_flags;
5208
5209                 update_flags = VNODE_UPDATE_NAME;
5210
5211                 if (fdvp != tdvp)
5212                         update_flags |= VNODE_UPDATE_PARENT;
5213
5214                 vnode_update_identity(fvp, tdvp, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen, tond.ni_cnd.cn_hash, update_flags);
5215         }
5216 out1:
5217         if (to_name != NULL) {
5218                 RELEASE_PATH(to_name);
5219                 to_name = NULL;
5220         }
5221         if (from_name != NULL) {
5222                 RELEASE_PATH(from_name);
5223                 from_name = NULL;
5224         }
5225         if (holding_mntlock) {
5226                 mount_unlock_renames(locked_mp);
5227                 mount_drop(locked_mp, 0);
5228                 holding_mntlock = 0;
5229         }
5230         if (tdvp) {
5231                 /*
5232                  * nameidone has to happen before we vnode_put(tdvp)
5233                  * since it may need to release the fs_nodelock on the tdvp
5234                  */
5235                 nameidone(&tond);
5236
5237                 if (tvp)
5238                         vnode_put(tvp);
5239                 vnode_put(tdvp);
5240         }
5241         if (fdvp) {
5242                 /*
5243                  * nameidone has to happen before we vnode_put(fdvp)
5244                  * since it may need to release the fs_nodelock on the fdvp
5245                  */
5246                 nameidone(&fromnd);
5247
5248                 if (fvp)
5249                         vnode_put(fvp);
5250                 vnode_put(fdvp);
5251         }
5252
5253     /*
5254      * If things changed after we did the namei, then we will re-drive
5255      * this rename call from the top.
5256      */
5257         if(do_retry) {
5258         do_retry = 0;
5259                 goto retry;
5260         }
5261
5262         return (error);
5263 }
5264
5265 /*
5266  * Make a directory file.
5267  *
5268  * Returns:     0                       Success
5269  *              EEXIST
5270  *      namei:???
5271  *      vnode_authorize:???
5272  *      vn_create:???
5273  */
5274 /* ARGSUSED */
5275 static int
5276 mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
5277 {
5278         vnode_t vp, dvp;
5279         int error;
5280         int update_flags = 0;
5281         struct nameidata nd;
5282
5283         AUDIT_ARG(mode, vap->va_mode);
5284         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
5285                 UIO_USERSPACE, path, ctx);
5286         nd.ni_cnd.cn_flags |= WILLBEDIR;
5287         error = namei(&nd);
5288         if (error)
5289                 return (error);
5290         dvp = nd.ni_dvp;
5291         vp = nd.ni_vp;
5292
5293         if (vp != NULL) {
5294                 error = EEXIST;
5295                 goto out;
5296         }
5297
5298         VATTR_SET(vap, va_type, VDIR);
5299
5300 #if CONFIG_MACF
5301         error = mac_vnode_check_create(ctx,
5302             nd.ni_dvp, &nd.ni_cnd, vap);
5303         if (error)
5304                 goto out;
5305 #endif
5306
5307         /* authorize addition of a directory to the parent */
5308         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
5309                 goto out;
5310
5311
5312         /* make the directory */
5313         if ((error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx)) != 0)
5314                 goto out;
5315
5316         // Make sure the name & parent pointers are hooked up
5317         if (vp->v_name == NULL)
5318                 update_flags |= VNODE_UPDATE_NAME;
5319         if (vp->v_parent == NULLVP)
5320                 update_flags |= VNODE_UPDATE_PARENT;
5321
5322         if (update_flags)
5323                 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
5324
5325 #if CONFIG_FSE
5326         add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
5327 #endif
5328
5329 out:
5330         /*
5331          * nameidone has to happen before we vnode_put(dvp)
5332          * since it may need to release the fs_nodelock on the dvp
5333          */
5334         nameidone(&nd);
5335
5336         if (vp)
5337                 vnode_put(vp);
5338         vnode_put(dvp);
5339
5340         return (error);
5341 }
5342
5343
5344 int
5345 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused register_t *retval)
5346 {
5347         int ciferror;
5348         kauth_filesec_t xsecdst;
5349         struct vnode_attr va;
5350
5351         xsecdst = NULL;
5352         if ((uap->xsecurity != USER_ADDR_NULL) &&
5353             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
5354                 return ciferror;
5355
5356         VATTR_INIT(&va);
5357         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
5358         if (xsecdst != NULL)
5359                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5360
5361         ciferror = mkdir1(vfs_context_current(), uap->path, &va);
5362         if (xsecdst != NULL)
5363                 kauth_filesec_free(xsecdst);
5364         return ciferror;
5365 }
5366
5367 int
5368 mkdir(proc_t p, struct mkdir_args *uap, __unused register_t *retval)
5369 {
5370         struct vnode_attr va;
5371
5372         VATTR_INIT(&va);
5373         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
5374
5375         return(mkdir1(vfs_context_current(), uap->path, &va));
5376 }
5377
5378 /*
5379  * Remove a directory file.
5380  */
5381 /* ARGSUSED */
5382 int
5383 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused register_t *retval)
5384 {
5385         vnode_t vp, dvp;
5386         int error;
5387         struct nameidata nd;
5388         vfs_context_t ctx = vfs_context_current();
5389
5390         int restart_flag, oldvp_id = -1;
5391
5392         /*
5393          * This loop exists to restart rmdir in the unlikely case that two
5394          * processes are simultaneously trying to remove the same directory
5395          * containing orphaned appleDouble files.
5396          */
5397         do {
5398                 restart_flag = 0;
5399
5400                 NDINIT(&nd, DELETE, LOCKPARENT | AUDITVNPATH1,
5401                                 UIO_USERSPACE, uap->path, ctx);
5402                 error = namei(&nd);
5403                 if (error)
5404                         return (error);
5405
5406                 dvp = nd.ni_dvp;
5407                 vp = nd.ni_vp;
5408
5409
5410                 /*
5411                  * If being restarted check if the new vp
5412                  * still has the same v_id.
5413                  */
5414                 if (oldvp_id != -1 && oldvp_id != vp->v_id) {
5415                         error = ENOENT;
5416                         goto out;
5417                 }
5418
5419                 if (vp->v_type != VDIR) {
5420                         /*
5421                          * rmdir only deals with directories
5422                          */
5423                         error = ENOTDIR;
5424                 } else if (dvp == vp) {
5425                         /*
5426                          * No rmdir "." please.
5427                          */
5428                         error = EINVAL;
5429                 } else if (vp->v_flag & VROOT) {
5430                         /*
5431                          * The root of a mounted filesystem cannot be deleted.
5432                          */
5433                         error = EBUSY;
5434                 } else {
5435 #if CONFIG_MACF
5436                         error = mac_vnode_check_unlink(ctx, dvp,
5437                                         vp, &nd.ni_cnd);
5438                         if (!error)
5439 #endif
5440                                 error = vnode_authorize(vp, nd.ni_dvp, KAUTH_VNODE_DELETE, ctx);
5441                 }
5442                 if (!error) {
5443                         char     *path = NULL;
5444                         int       len;
5445                         fse_info  finfo;
5446                         int has_listeners = 0;
5447                         int need_event = 0;
5448
5449 #if CONFIG_FSE
5450                         need_event = need_fsevent(FSE_DELETE, dvp);
5451                         if (need_event) {
5452                                 get_fse_info(vp, &finfo, ctx);
5453                         }
5454 #endif
5455                         has_listeners = kauth_authorize_fileop_has_listeners();
5456                         if (need_event || has_listeners) {
5457                                 GET_PATH(path);
5458                                 if (path == NULL) {
5459                                         error = ENOMEM;
5460                                         goto out;
5461                                 }
5462                                 len = MAXPATHLEN;
5463                                 vn_getpath(vp, path, &len);
5464                         }
5465
5466                         error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
5467
5468                         /*
5469                          * Special case to remove orphaned AppleDouble
5470                          * files. I don't like putting this in the kernel,
5471                          * but carbon does not like putting this in carbon either,
5472                          * so here we are.
5473                          */
5474                         if (error == ENOTEMPTY) {
5475                                 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
5476                                 if (error == EBUSY) {
5477                                         oldvp_id = vp->v_id;
5478                                         goto out;
5479                                 }
5480
5481
5482                                 /*
5483                                  * Assuming everything went well, we will try the RMDIR again
5484                                  */
5485                                 if (!error)
5486                                         error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
5487                         }
5488
5489                         /*
5490                          * Call out to allow 3rd party notification of delete.
5491                          * Ignore result of kauth_authorize_fileop call.
5492                          */
5493                         if (!error) {
5494                                 if (has_listeners) {
5495                                         kauth_authorize_fileop(vfs_context_ucred(ctx),
5496                                                         KAUTH_FILEOP_DELETE,
5497                                                         (uintptr_t)vp,
5498                                                         (uintptr_t)path);
5499                                 }
5500
5501                                 if (vp->v_flag & VISHARDLINK) {
5502                                     // see the comment in unlink1() about why we update
5503                                     // the parent of a hard link when it is removed
5504                                     vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
5505                                 }
5506
5507 #if CONFIG_FSE
5508                                 if (need_event) {
5509                                         add_fsevent(FSE_DELETE, ctx,
5510                                                         FSE_ARG_STRING, len, path,
5511                                                         FSE_ARG_FINFO, &finfo,
5512                                                         FSE_ARG_DONE);
5513                                 }
5514 #endif
5515                         }
5516                         if (path != NULL)
5517                                 RELEASE_PATH(path);
5518                 }
5519
5520 out:
5521                 /*
5522                  * nameidone has to happen before we vnode_put(dvp)
5523                  * since it may need to release the fs_nodelock on the dvp
5524                  */
5525                 nameidone(&nd);
5526
5527                 vnode_put(dvp);
5528                 vnode_put(vp);
5529
5530                 if (restart_flag == 0) {
5531                         wakeup_one((caddr_t)vp);
5532                         return (error);
5533                 }
5534                 tsleep(vp, PVFS, "rm AD", 1);
5535
5536         } while (restart_flag != 0);
5537
5538         return (error);
5539
5540 }
5541
5542 /* Get direntry length padded to 8 byte alignment */
5543 #define DIRENT64_LEN(namlen) \
5544         ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
5545
5546 static errno_t
5547 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
5548                 int *numdirent, vfs_context_t ctxp)
5549 {
5550         /* Check if fs natively supports VNODE_READDIR_EXTENDED */
5551         if (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) {
5552                 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
5553         } else {
5554                 size_t bufsize;
5555                 void * bufptr;
5556                 uio_t auio;
5557                 struct direntry entry64;
5558                 struct dirent *dep;
5559                 int bytesread;
5560                 int error;
5561
5562                 /*
5563                  * Our kernel buffer needs to be smaller since re-packing
5564                  * will expand each dirent.  The worse case (when the name
5565                  * length is 3) corresponds to a struct direntry size of 32
5566                  * bytes (8-byte aligned) and a struct dirent size of 12 bytes
5567                  * (4-byte aligned).  So having a buffer that is 3/8 the size
5568                  * will prevent us from reading more than we can pack.
5569                  *
5570                  * Since this buffer is wired memory, we will limit the
5571                  * buffer size to a maximum of 32K. We would really like to
5572                  * use 32K in the MIN(), but we use magic number 87371 to
5573                  * prevent uio_resid() * 3 / 8 from overflowing.
5574                  */
5575                 bufsize = 3 * MIN(uio_resid(uio), 87371) / 8;
5576                 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
5577
5578                 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
5579                 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
5580                 auio->uio_offset = uio->uio_offset;
5581
5582                 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
5583
5584                 dep = (struct dirent *)bufptr;
5585                 bytesread = bufsize - uio_resid(auio);
5586
5587                 /*
5588                  * Convert all the entries and copy them out to user's buffer.
5589                  */
5590                 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
5591                         /* Convert a dirent to a dirent64. */
5592                         entry64.d_ino = dep->d_ino;
5593                         entry64.d_seekoff = 0;
5594                         entry64.d_reclen = DIRENT64_LEN(dep->d_namlen);
5595                         entry64.d_namlen = dep->d_namlen;
5596                         entry64.d_type = dep->d_type;
5597                         bcopy(dep->d_name, entry64.d_name, dep->d_namlen + 1);
5598
5599                         /* Move to next entry. */
5600                         dep = (struct dirent *)((char *)dep + dep->d_reclen);
5601
5602                         /* Copy entry64 to user's buffer. */
5603                         error = uiomove((caddr_t)&entry64, entry64.d_reclen, uio);
5604                 }
5605
5606                 /* Update the real offset using the offset we got from VNOP_READDIR. */
5607                 if (error == 0) {
5608                         uio->uio_offset = auio->uio_offset;
5609                 }
5610                 uio_free(auio);
5611                 FREE(bufptr, M_TEMP);
5612                 return (error);
5613         }
5614 }
5615
5616 /*
5617  * Read a block of directory entries in a file system independent format.
5618  */
5619 static int
5620 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
5621                      off_t *offset, int flags)
5622 {
5623         vnode_t vp;
5624         struct vfs_context context = *vfs_context_current();    /* local copy */
5625         struct fileproc *fp;
5626         uio_t auio;
5627         int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5628         off_t loff;
5629         int error, eofflag, numdirent;
5630         char uio_buf[ UIO_SIZEOF(1) ];
5631
5632         error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
5633         if (error) {
5634                 return (error);
5635         }
5636         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
5637                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
5638                 error = EBADF;
5639                 goto out;
5640         }
5641
5642 #if CONFIG_MACF
5643         error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
5644         if (error)
5645                 goto out;
5646 #endif
5647         if ( (error = vnode_getwithref(vp)) ) {
5648                 goto out;
5649         }
5650         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5651
5652 unionread:
5653         if (vp->v_type != VDIR) {
5654                 (void)vnode_put(vp);
5655                 error = EINVAL;
5656                 goto out;
5657         }
5658
5659 #if CONFIG_MACF
5660         error = mac_vnode_check_readdir(&context, vp);
5661         if (error != 0) {
5662                 (void)vnode_put(vp);
5663                 goto out;
5664         }
5665 #endif /* MAC */
5666
5667         loff = fp->f_fglob->fg_offset;
5668         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
5669         uio_addiov(auio, bufp, bufsize);
5670
5671         if (flags & VNODE_READDIR_EXTENDED) {
5672                 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
5673                 fp->f_fglob->fg_offset = uio_offset(auio);
5674         } else {
5675                 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
5676                 fp->f_fglob->fg_offset = uio_offset(auio);
5677         }
5678         if (error) {
5679                 (void)vnode_put(vp);
5680                 goto out;
5681         }
5682
5683         if ((user_ssize_t)bufsize == uio_resid(auio)){
5684                 if (union_dircheckp) {
5685                         error = union_dircheckp(&vp, fp, &context);
5686                         if (error == -1)
5687                                 goto unionread;
5688                         if (error)
5689                                 goto out;
5690                 }
5691
5692                 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) {
5693                         struct vnode *tvp = vp;
5694                         vp = vp->v_mount->mnt_vnodecovered;
5695                         vnode_getwithref(vp);
5696                         vnode_ref(vp);
5697                         fp->f_fglob->fg_data = (caddr_t) vp;
5698                         fp->f_fglob->fg_offset = 0;
5699                         vnode_rele(tvp);
5700                         vnode_put(tvp);
5701                         goto unionread;
5702                 }
5703         }
5704
5705         vnode_put(vp);
5706         if (offset) {
5707                 *offset = loff;
5708         }
5709         // LP64todo - fix this
5710         *bytesread = bufsize - uio_resid(auio);
5711 out:
5712         file_drop(fd);
5713         return (error);
5714 }
5715
5716
5717 int
5718 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, register_t *retval)
5719 {
5720         off_t offset;
5721         long loff;
5722         ssize_t bytesread;
5723         int error;
5724
5725         AUDIT_ARG(fd, uap->fd);
5726         error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
5727
5728         if (error == 0) {
5729                 loff = (long)offset;
5730                 error = copyout((caddr_t)&loff, uap->basep, sizeof(long));
5731                 *retval = bytesread;
5732         }
5733         return (error);
5734 }
5735
5736 int
5737 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
5738 {
5739         off_t offset;
5740         ssize_t bytesread;
5741         int error;
5742
5743         AUDIT_ARG(fd, uap->fd);
5744         error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
5745
5746         if (error == 0) {
5747                 *retval = bytesread;
5748                 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
5749         }
5750         return (error);
5751 }
5752
5753
5754 /*
5755  * Set the mode mask for creation of filesystem nodes.
5756  */
5757 #warning XXX implement xsecurity
5758
5759 #define UMASK_NOXSECURITY        (void *)1      /* leave existing xsecurity alone */
5760 static int
5761 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, register_t *retval)
5762 {
5763         struct filedesc *fdp;
5764
5765         AUDIT_ARG(mask, newmask);
5766         proc_fdlock(p);
5767         fdp = p->p_fd;
5768         *retval = fdp->fd_cmask;
5769         fdp->fd_cmask = newmask & ALLPERMS;
5770         proc_fdunlock(p);
5771         return (0);
5772 }
5773
5774
5775 int
5776 umask_extended(proc_t p, struct umask_extended_args *uap, register_t *retval)
5777 {
5778         int ciferror;
5779         kauth_filesec_t xsecdst;
5780
5781         xsecdst = KAUTH_FILESEC_NONE;
5782         if (uap->xsecurity != USER_ADDR_NULL) {
5783                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5784                         return ciferror;
5785         } else {
5786                 xsecdst = KAUTH_FILESEC_NONE;
5787         }
5788
5789         ciferror = umask1(p, uap->newmask, xsecdst, retval);
5790
5791         if (xsecdst != KAUTH_FILESEC_NONE)
5792                 kauth_filesec_free(xsecdst);
5793         return ciferror;
5794 }
5795
5796 int
5797 umask(proc_t p, struct umask_args *uap, register_t *retval)
5798 {
5799         return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
5800 }
5801
5802 /*
5803  * Void all references to file by ripping underlying filesystem
5804  * away from vnode.
5805  */
5806 /* ARGSUSED */
5807 int
5808 revoke(proc_t p, struct revoke_args *uap, __unused register_t *retval)
5809 {
5810         vnode_t vp;
5811         struct vnode_attr va;
5812         vfs_context_t ctx = vfs_context_current();
5813         int error;
5814         struct nameidata nd;
5815
5816         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
5817                 UIO_USERSPACE, uap->path, ctx);
5818         error = namei(&nd);
5819         if (error)
5820                 return (error);
5821         vp = nd.ni_vp;
5822
5823         nameidone(&nd);
5824
5825 #if CONFIG_MACF
5826         error = mac_vnode_check_revoke(ctx, vp);
5827         if (error)
5828                 goto out;
5829 #endif
5830
5831         VATTR_INIT(&va);
5832         VATTR_WANTED(&va, va_uid);
5833         if ((error = vnode_getattr(vp, &va, ctx)))
5834                 goto out;
5835         if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
5836             (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
5837                 goto out;
5838         if (vp->v_usecount > 1 || (vp->v_flag & VALIASED))
5839                 VNOP_REVOKE(vp, REVOKEALL, ctx);
5840 out:
5841         vnode_put(vp);
5842         return (error);
5843 }
5844
5845
5846 /*
5847  *  HFS/HFS PlUS SPECIFIC SYSTEM CALLS
5848  *  The following system calls are designed to support features
5849  *  which are specific to the HFS & HFS Plus volume formats
5850  */
5851
5852 #ifdef __APPLE_API_OBSOLETE
5853
5854 /************************************************/
5855 /* *** Following calls will be deleted soon *** */
5856 /************************************************/
5857
5858 /*
5859  * Make a complex file.  A complex file is one with multiple forks (data streams)
5860  */
5861 /* ARGSUSED */
5862 int
5863 mkcomplex(__unused proc_t p, __unused struct mkcomplex_args *uap, __unused register_t *retval)
5864 {
5865         return (ENOTSUP);
5866 }
5867
5868 /*
5869  * Extended stat call which returns volumeid and vnodeid as well as other info
5870  */
5871 /* ARGSUSED */
5872 int
5873 statv(__unused proc_t p,
5874           __unused struct statv_args *uap,
5875           __unused register_t *retval)
5876 {
5877         return (ENOTSUP);       /*  We'll just return an error for now */
5878
5879 } /* end of statv system call */
5880
5881 /*
5882 * Extended lstat call which returns volumeid and vnodeid as well as other info
5883 */
5884 /* ARGSUSED */
5885 int
5886 lstatv(__unused proc_t p,
5887            __unused struct lstatv_args *uap,
5888            __unused register_t *retval)
5889 {
5890        return (ENOTSUP);        /*  We'll just return an error for now */
5891 } /* end of lstatv system call */
5892
5893 /*
5894 * Extended fstat call which returns volumeid and vnodeid as well as other info
5895 */
5896 /* ARGSUSED */
5897 int
5898 fstatv(__unused proc_t p,
5899            __unused struct fstatv_args *uap,
5900            __unused register_t *retval)
5901 {
5902        return (ENOTSUP);        /*  We'll just return an error for now */
5903 } /* end of fstatv system call */
5904
5905
5906 /************************************************/
5907 /* *** Preceding calls will be deleted soon *** */
5908 /************************************************/
5909
5910 #endif /* __APPLE_API_OBSOLETE */
5911
5912 /*
5913 * Obtain attribute information on objects in a directory while enumerating
5914 * the directory.  This call does not yet support union mounted directories.
5915 * TO DO
5916 *  1.union mounted directories.
5917 */
5918
5919 /* ARGSUSED */
5920 int
5921 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, register_t *retval)
5922 {
5923         vnode_t vp;
5924         struct fileproc *fp;
5925         uio_t auio = NULL;
5926         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5927         uint32_t count;
5928         uint32_t newstate;
5929         int error, eofflag;
5930         uint32_t loff;
5931         struct attrlist attributelist;
5932         vfs_context_t ctx = vfs_context_current();
5933         int fd = uap->fd;
5934         char uio_buf[ UIO_SIZEOF(1) ];
5935         kauth_action_t action;
5936
5937         AUDIT_ARG(fd, fd);
5938
5939         /* Get the attributes into kernel space */
5940         if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
5941                 return(error);
5942         }
5943         if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
5944                 return(error);
5945         }
5946         if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
5947                 return (error);
5948         }
5949         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
5950                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
5951                 error = EBADF;
5952                 goto out;
5953         }
5954
5955
5956 #if CONFIG_MACF
5957         error = mac_file_check_change_offset(vfs_context_ucred(ctx),
5958             fp->f_fglob);
5959         if (error)
5960                 goto out;
5961 #endif
5962
5963
5964         if ( (error = vnode_getwithref(vp)) )
5965                 goto out;
5966
5967         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5968
5969         if (vp->v_type != VDIR) {
5970                 (void)vnode_put(vp);
5971                 error = EINVAL;
5972                 goto out;
5973         }
5974
5975 #if CONFIG_MACF
5976         error = mac_vnode_check_readdir(ctx, vp);
5977         if (error != 0) {
5978                 (void)vnode_put(vp);
5979                 goto out;
5980         }
5981 #endif /* MAC */
5982
5983         /* set up the uio structure which will contain the users return buffer */
5984         loff = fp->f_fglob->fg_offset;
5985         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ,
5986             &uio_buf[0], sizeof(uio_buf));
5987         uio_addiov(auio, uap->buffer, uap->buffersize);
5988
5989         /*
5990          * If the only item requested is file names, we can let that past with
5991          * just LIST_DIRECTORY.  If they want any other attributes, that means
5992          * they need SEARCH as well.
5993          */
5994         action = KAUTH_VNODE_LIST_DIRECTORY;
5995         if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
5996             attributelist.fileattr || attributelist.dirattr)
5997                 action |= KAUTH_VNODE_SEARCH;
5998
5999         if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
6000                 u_long ulcount = count;
6001
6002                 error = VNOP_READDIRATTR(vp, &attributelist, auio,
6003                                          count,
6004                                          uap->options, (unsigned long *)&newstate, &eofflag,
6005                                          &ulcount, ctx);
6006                 if (!error)
6007                         count = ulcount;
6008         }
6009         (void)vnode_put(vp);
6010
6011         if (error)
6012                 goto out;
6013         fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
6014
6015         if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
6016                 goto out;
6017         if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
6018                 goto out;
6019         if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
6020                 goto out;
6021
6022         *retval = eofflag;  /* similar to getdirentries */
6023         error = 0;
6024 out:
6025         file_drop(fd);
6026         return (error); /* return error earlier, an retval of 0 or 1 now */
6027
6028 } /* end of getdirentryattr system call */
6029
6030 /*
6031 * Exchange data between two files
6032 */
6033
6034 /* ARGSUSED */
6035 int
6036 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused register_t *retval)
6037 {
6038
6039         struct nameidata fnd, snd;
6040         vfs_context_t ctx = vfs_context_current();
6041         vnode_t fvp;
6042         vnode_t svp;
6043         int error;
6044         u_long nameiflags;
6045         char *fpath = NULL;
6046         char *spath = NULL;
6047         int   flen, slen;
6048         fse_info f_finfo, s_finfo;
6049
6050         nameiflags = 0;
6051         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6052
6053     NDINIT(&fnd, LOOKUP, nameiflags | AUDITVNPATH1,
6054                 UIO_USERSPACE, uap->path1, ctx);
6055
6056     error = namei(&fnd);
6057     if (error)
6058         goto out2;
6059
6060         nameidone(&fnd);
6061         fvp = fnd.ni_vp;
6062
6063     NDINIT(&snd, LOOKUP | CN_NBMOUNTLOOK, nameiflags | AUDITVNPATH2,
6064                 UIO_USERSPACE, uap->path2, ctx);
6065
6066     error = namei(&snd);
6067     if (error) {
6068                 vnode_put(fvp);
6069                 goto out2;
6070     }
6071         nameidone(&snd);
6072         svp = snd.ni_vp;
6073
6074         /*
6075          * if the files are the same, return an inval error
6076          */
6077         if (svp == fvp) {
6078                 error = EINVAL;
6079                 goto out;
6080         }
6081
6082         /*
6083          * if the files are on different volumes, return an error
6084          */
6085         if (svp->v_mount != fvp->v_mount) {
6086                 error = EXDEV;
6087                 goto out;
6088         }
6089
6090 #if CONFIG_MACF
6091         error = mac_vnode_check_exchangedata(ctx,
6092             fvp, svp);
6093         if (error)
6094                 goto out;
6095 #endif
6096         if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
6097             ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
6098                 goto out;
6099
6100         if (
6101 #if CONFIG_FSE
6102         need_fsevent(FSE_EXCHANGE, fvp) ||
6103 #endif
6104         kauth_authorize_fileop_has_listeners()) {
6105                 GET_PATH(fpath);
6106                 GET_PATH(spath);
6107                 if (fpath == NULL || spath == NULL) {
6108                         error = ENOMEM;
6109                         goto out;
6110                 }
6111                 flen = MAXPATHLEN;
6112                 slen = MAXPATHLEN;
6113                 if (vn_getpath(fvp, fpath, &flen) != 0 || fpath[0] == '\0') {
6114                         printf("exchange: vn_getpath(fvp=%p) failed <<%s>>\n",
6115                                fvp, fpath);
6116                 }
6117                 if (vn_getpath(svp, spath, &slen) != 0 || spath[0] == '\0') {
6118                         printf("exchange: vn_getpath(svp=%p) failed <<%s>>\n",
6119                                svp, spath);
6120                 }
6121 #if CONFIG_FSE
6122                 get_fse_info(fvp, &f_finfo, ctx);
6123                 get_fse_info(svp, &s_finfo, ctx);
6124 #endif
6125         }
6126         /* Ok, make the call */
6127         error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
6128
6129         if (error == 0) {
6130             const char *tmpname;
6131
6132             if (fpath != NULL && spath != NULL) {
6133                     /* call out to allow 3rd party notification of exchangedata.
6134                      * Ignore result of kauth_authorize_fileop call.
6135                      */
6136                     kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
6137                                            (uintptr_t)fpath, (uintptr_t)spath);
6138             }
6139             name_cache_lock();
6140
6141             tmpname     = fvp->v_name;
6142             fvp->v_name = svp->v_name;
6143             svp->v_name = tmpname;
6144
6145             if (fvp->v_parent != svp->v_parent) {
6146                 vnode_t tmp;
6147
6148                 tmp           = fvp->v_parent;
6149                 fvp->v_parent = svp->v_parent;
6150                 svp->v_parent = tmp;
6151             }
6152             name_cache_unlock();
6153
6154 #if CONFIG_FSE
6155             if (fpath != NULL && spath != NULL) {
6156                     add_fsevent(FSE_EXCHANGE, ctx,
6157                                 FSE_ARG_STRING, flen, fpath,
6158                                 FSE_ARG_FINFO, &f_finfo,
6159                                 FSE_ARG_STRING, slen, spath,
6160                                 FSE_ARG_FINFO, &s_finfo,
6161                                 FSE_ARG_DONE);
6162             }
6163 #endif
6164         }
6165
6166 out:
6167         if (fpath != NULL)
6168                 RELEASE_PATH(fpath);
6169         if (spath != NULL)
6170                 RELEASE_PATH(spath);
6171         vnode_put(svp);
6172         vnode_put(fvp);
6173 out2:
6174         return (error);
6175 }
6176
6177
6178 /* ARGSUSED */
6179
6180 int
6181 searchfs(proc_t p, struct searchfs_args *uap, __unused register_t *retval)
6182 {
6183         vnode_t vp;
6184         int error=0;
6185         int fserror = 0;
6186         struct nameidata nd;
6187         struct user_fssearchblock searchblock;
6188         struct searchstate *state;
6189         struct attrlist *returnattrs;
6190         void *searchparams1,*searchparams2;
6191         uio_t auio = NULL;
6192         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6193         u_long nummatches;
6194         int mallocsize;
6195         u_long nameiflags;
6196         vfs_context_t ctx = vfs_context_current();
6197         char uio_buf[ UIO_SIZEOF(1) ];
6198
6199         /* Start by copying in fsearchblock paramater list */
6200     if (IS_64BIT_PROCESS(p)) {
6201        error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
6202     }
6203     else {
6204         struct fssearchblock tmp_searchblock;
6205         error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
6206         // munge into 64-bit version
6207         searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
6208         searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
6209         searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
6210         searchblock.maxmatches = tmp_searchblock.maxmatches;
6211         searchblock.timelimit.tv_sec = tmp_searchblock.timelimit.tv_sec;
6212         searchblock.timelimit.tv_usec = tmp_searchblock.timelimit.tv_usec;
6213         searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
6214         searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
6215         searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
6216         searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
6217         searchblock.searchattrs = tmp_searchblock.searchattrs;
6218     }
6219         if (error)
6220                 return(error);
6221
6222         /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
6223          */
6224         if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
6225                 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
6226                 return(EINVAL);
6227
6228         /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
6229         /* It all has to do into local memory and it's not that big so we might as well  put it all together. */
6230         /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
6231         /* block.                                                                                             */
6232
6233         mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
6234                       sizeof(struct attrlist) + sizeof(struct searchstate);
6235
6236         MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
6237
6238         /* Now set up the various pointers to the correct place in our newly allocated memory */
6239
6240         searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
6241         returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
6242         state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
6243
6244         /* Now copy in the stuff given our local variables. */
6245
6246         if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
6247                 goto freeandexit;
6248
6249         if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
6250                 goto freeandexit;
6251
6252         if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
6253                 goto freeandexit;
6254
6255         if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
6256                 goto freeandexit;
6257
6258         /* set up the uio structure which will contain the users return buffer */
6259
6260         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
6261                                                                   &uio_buf[0], sizeof(uio_buf));
6262     uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
6263
6264         nameiflags = 0;
6265         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6266         NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1,
6267                 UIO_USERSPACE, uap->path, ctx);
6268
6269         error = namei(&nd);
6270         if (error)
6271                 goto freeandexit;
6272
6273         nameidone(&nd);
6274         vp = nd.ni_vp;
6275
6276
6277         /*
6278          * If searchblock.maxmatches == 0, then skip the search. This has happened
6279          * before and sometimes the underlyning code doesnt deal with it well.
6280          */
6281          if (searchblock.maxmatches == 0) {
6282                 nummatches = 0;
6283                 goto saveandexit;
6284          }
6285
6286         /*
6287            Allright, we have everything we need, so lets make that call.
6288
6289            We keep special track of the return value from the file system:
6290            EAGAIN is an acceptable error condition that shouldn't keep us
6291            from copying out any results...
6292          */
6293
6294         fserror = VNOP_SEARCHFS(vp,
6295                                                         searchparams1,
6296                                                         searchparams2,
6297                                                         &searchblock.searchattrs,
6298                                                         searchblock.maxmatches,
6299                                                         &searchblock.timelimit,
6300                                                         returnattrs,
6301                                                         &nummatches,
6302                                                         uap->scriptcode,
6303                                                         uap->options,
6304                                                         auio,
6305                                                         state,
6306                                                         ctx);
6307
6308 saveandexit:
6309
6310         vnode_put(vp);
6311
6312         /* Now copy out the stuff that needs copying out. That means the number of matches, the
6313            search state.  Everything was already put into he return buffer by the vop call. */
6314
6315         if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
6316                 goto freeandexit;
6317
6318     if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
6319                 goto freeandexit;
6320
6321         error = fserror;
6322
6323 freeandexit:
6324
6325         FREE(searchparams1,M_TEMP);
6326
6327         return(error);
6328
6329
6330 } /* end of searchfs system call */
6331
6332
6333 /*
6334  * Make a filesystem-specific control call:
6335  */
6336 /* ARGSUSED */
6337 int
6338 fsctl (proc_t p, struct fsctl_args *uap, __unused register_t *retval)
6339 {
6340         int error;
6341         boolean_t is64bit;
6342         struct nameidata nd;
6343         u_long nameiflags;
6344         u_long cmd = uap->cmd;
6345         u_int size;
6346 #define STK_PARAMS 128
6347         char stkbuf[STK_PARAMS];
6348         caddr_t data, memp;
6349         vfs_context_t ctx = vfs_context_current();
6350
6351         size = IOCPARM_LEN(cmd);
6352         if (size > IOCPARM_MAX) return (EINVAL);
6353
6354     is64bit = proc_is64bit(p);
6355
6356         memp = NULL;
6357         if (size > sizeof (stkbuf)) {
6358                 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
6359                 data = memp;
6360         } else {
6361                 data = &stkbuf[0];
6362         };
6363
6364         if (cmd & IOC_IN) {
6365                 if (size) {
6366                         error = copyin(uap->data, data, size);
6367                         if (error) goto FSCtl_Exit;
6368                 } else {
6369                     if (is64bit) {
6370                         *(user_addr_t *)data = uap->data;
6371                     }
6372                     else {
6373                         *(uint32_t *)data = (uint32_t)uap->data;
6374                     }
6375                 };
6376         } else if ((cmd & IOC_OUT) && size) {
6377                 /*
6378                  * Zero the buffer so the user always
6379                  * gets back something deterministic.
6380                  */
6381                 bzero(data, size);
6382         } else if (cmd & IOC_VOID) {
6383         if (is64bit) {
6384             *(user_addr_t *)data = uap->data;
6385         }
6386         else {
6387             *(uint32_t *)data = (uint32_t)uap->data;
6388         }
6389         }
6390
6391         /* Get the vnode for the file we are getting info on:  */
6392         nameiflags = 0;
6393         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6394         NDINIT(&nd, LOOKUP, nameiflags, UIO_USERSPACE, uap->path, ctx);
6395         if ((error = namei(&nd))) goto FSCtl_Exit;
6396
6397 #if CONFIG_MACF
6398         error = mac_mount_check_fsctl(ctx, vnode_mount(nd.ni_vp), cmd);
6399         if (error) {
6400                 vnode_put(nd.ni_vp);
6401                 nameidone(&nd);
6402                 goto FSCtl_Exit;
6403         }
6404 #endif
6405
6406         /* Invoke the filesystem-specific code */
6407         error = VNOP_IOCTL(nd.ni_vp, IOCBASECMD(cmd), data, uap->options, ctx);
6408
6409         vnode_put(nd.ni_vp);
6410         nameidone(&nd);
6411
6412         /*
6413          * Copy any data to user, size was
6414          * already set and checked above.
6415          */
6416         if (error == 0 && (cmd & IOC_OUT) && size)
6417                 error = copyout(data, uap->data, size);
6418
6419 FSCtl_Exit:
6420         if (memp) kfree(memp, size);
6421
6422         return error;
6423 }
6424 /* end of fsctl system call */
6425
6426 /*
6427  * An in-kernel sync for power management to call.
6428  */
6429 __private_extern__ int
6430 sync_internal(void)
6431 {
6432         int error;
6433
6434         struct sync_args data;
6435
6436         int retval[2];
6437
6438
6439         error = sync(current_proc(), &data, &retval[0]);
6440
6441
6442         return (error);
6443 } /* end of sync_internal call */
6444
6445
6446 /*
6447  *  Retrieve the data of an extended attribute.
6448  */
6449 int
6450 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
6451 {
6452         vnode_t vp;
6453         struct nameidata nd;
6454         char attrname[XATTR_MAXNAMELEN+1];
6455         vfs_context_t ctx = vfs_context_current();
6456         uio_t auio = NULL;
6457         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6458         size_t attrsize = 0;
6459         size_t namelen;
6460         u_long nameiflags;
6461         int error;
6462         char uio_buf[ UIO_SIZEOF(1) ];
6463
6464         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
6465                 return (EINVAL);
6466
6467         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6468         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
6469         if ((error = namei(&nd))) {
6470                 return (error);
6471         }
6472         vp = nd.ni_vp;
6473         nameidone(&nd);
6474
6475         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6476                 goto out;
6477         }
6478         if (xattr_protected(attrname)) {
6479                 error = EPERM;
6480                 goto out;
6481         }
6482         if (uap->value && uap->size > 0) {
6483                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
6484                                             &uio_buf[0], sizeof(uio_buf));
6485                 uio_addiov(auio, uap->value, uap->size);
6486         }
6487
6488         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
6489 out:
6490         vnode_put(vp);
6491
6492         if (auio) {
6493                 *retval = uap->size - uio_resid(auio);
6494         } else {
6495                 *retval = (user_ssize_t)attrsize;
6496         }
6497
6498         return (error);
6499 }
6500
6501 /*
6502  * Retrieve the data of an extended attribute.
6503  */
6504 int
6505 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
6506 {
6507         vnode_t vp;
6508         char attrname[XATTR_MAXNAMELEN+1];
6509         uio_t auio = NULL;
6510         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6511         size_t attrsize = 0;
6512         size_t namelen;
6513         int error;
6514         char uio_buf[ UIO_SIZEOF(1) ];
6515
6516         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
6517                 return (EINVAL);
6518
6519         if ( (error = file_vnode(uap->fd, &vp)) ) {
6520                 return (error);
6521         }
6522         if ( (error = vnode_getwithref(vp)) ) {
6523                 file_drop(uap->fd);
6524                 return(error);
6525         }
6526         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6527                 goto out;
6528         }
6529         if (xattr_protected(attrname)) {
6530                 error = EPERM;
6531                 goto out;
6532         }
6533         if (uap->value && uap->size > 0) {
6534                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
6535                                             &uio_buf[0], sizeof(uio_buf));
6536                 uio_addiov(auio, uap->value, uap->size);
6537         }
6538
6539         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
6540 out:
6541         (void)vnode_put(vp);
6542         file_drop(uap->fd);
6543
6544         if (auio) {
6545                 *retval = uap->size - uio_resid(auio);
6546         } else {
6547                 *retval = (user_ssize_t)attrsize;
6548         }
6549         return (error);
6550 }
6551
6552 /*
6553  * Set the data of an extended attribute.
6554  */
6555 int
6556 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
6557 {
6558         vnode_t vp;
6559         struct nameidata nd;
6560         char attrname[XATTR_MAXNAMELEN+1];
6561         vfs_context_t ctx = vfs_context_current();
6562         uio_t auio = NULL;
6563         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6564         size_t namelen;
6565         u_long nameiflags;
6566         int error;
6567         char uio_buf[ UIO_SIZEOF(1) ];
6568
6569         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
6570                 return (EINVAL);
6571
6572         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6573                 return (error);
6574         }
6575         if (xattr_protected(attrname))
6576                 return(EPERM);
6577         if (uap->size != 0 && uap->value == 0) {
6578                 return (EINVAL);
6579         }
6580
6581         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6582         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
6583         if ((error = namei(&nd))) {
6584                 return (error);
6585         }
6586         vp = nd.ni_vp;
6587         nameidone(&nd);
6588
6589         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
6590                                     &uio_buf[0], sizeof(uio_buf));
6591         uio_addiov(auio, uap->value, uap->size);
6592
6593         error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
6594 #if CONFIG_FSE
6595         if (error == 0) {
6596                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
6597                     FSE_ARG_VNODE, vp,
6598                     FSE_ARG_DONE);
6599         }
6600 #endif
6601         vnode_put(vp);
6602         *retval = 0;
6603         return (error);
6604 }
6605
6606 /*
6607  * Set the data of an extended attribute.
6608  */
6609 int
6610 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
6611 {
6612         vnode_t vp;
6613         char attrname[XATTR_MAXNAMELEN+1];
6614         uio_t auio = NULL;
6615         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6616         size_t namelen;
6617         int error;
6618         char uio_buf[ UIO_SIZEOF(1) ];
6619         vfs_context_t ctx = vfs_context_current();
6620
6621         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
6622                 return (EINVAL);
6623
6624         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6625                 return (error);
6626         }
6627         if (xattr_protected(attrname))
6628                 return(EPERM);
6629         if (uap->size != 0 && uap->value == 0) {
6630                 return (EINVAL);
6631         }
6632         if ( (error = file_vnode(uap->fd, &vp)) ) {
6633                 return (error);
6634         }
6635         if ( (error = vnode_getwithref(vp)) ) {
6636                 file_drop(uap->fd);
6637                 return(error);
6638         }
6639         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
6640                                     &uio_buf[0], sizeof(uio_buf));
6641         uio_addiov(auio, uap->value, uap->size);
6642
6643         error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
6644 #if CONFIG_FSE
6645         if (error == 0) {
6646                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
6647                     FSE_ARG_VNODE, vp,
6648                     FSE_ARG_DONE);
6649         }
6650 #endif
6651         vnode_put(vp);
6652         file_drop(uap->fd);
6653         *retval = 0;
6654         return (error);
6655 }
6656
6657 /*
6658  * Remove an extended attribute.
6659  */
6660 #warning "code duplication"
6661 int
6662 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
6663 {
6664         vnode_t vp;
6665         struct nameidata nd;
6666         char attrname[XATTR_MAXNAMELEN+1];
6667         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6668         vfs_context_t ctx = vfs_context_current();
6669         size_t namelen;
6670         u_long nameiflags;
6671         int error;
6672
6673         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
6674                 return (EINVAL);
6675
6676         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
6677         if (error != 0) {
6678                 return (error);
6679         }
6680         if (xattr_protected(attrname))
6681                 return(EPERM);
6682         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6683         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
6684         if ((error = namei(&nd))) {
6685                 return (error);
6686         }
6687         vp = nd.ni_vp;
6688         nameidone(&nd);
6689
6690         error = vn_removexattr(vp, attrname, uap->options, ctx);
6691 #if CONFIG_FSE
6692         if (error == 0) {
6693                 add_fsevent(FSE_XATTR_REMOVED, ctx,
6694                     FSE_ARG_VNODE, vp,
6695                     FSE_ARG_DONE);
6696         }
6697 #endif
6698         vnode_put(vp);
6699         *retval = 0;
6700         return (error);
6701 }
6702
6703 /*
6704  * Remove an extended attribute.
6705  */
6706 #warning "code duplication"
6707 int
6708 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
6709 {
6710         vnode_t vp;
6711         char attrname[XATTR_MAXNAMELEN+1];
6712         size_t namelen;
6713         int error;
6714         vfs_context_t ctx = vfs_context_current();
6715
6716         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
6717                 return (EINVAL);
6718
6719         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
6720         if (error != 0) {
6721                 return (error);
6722         }
6723         if (xattr_protected(attrname))
6724                 return(EPERM);
6725         if ( (error = file_vnode(uap->fd, &vp)) ) {
6726                 return (error);
6727         }
6728         if ( (error = vnode_getwithref(vp)) ) {
6729                 file_drop(uap->fd);
6730                 return(error);
6731         }
6732
6733         error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
6734 #if CONFIG_FSE
6735         if (error == 0) {
6736                 add_fsevent(FSE_XATTR_REMOVED, ctx,
6737                     FSE_ARG_VNODE, vp,
6738                     FSE_ARG_DONE);
6739         }
6740 #endif
6741         vnode_put(vp);
6742         file_drop(uap->fd);
6743         *retval = 0;
6744         return (error);
6745 }
6746
6747 /*
6748  * Retrieve the list of extended attribute names.
6749  */
6750 #warning "code duplication"
6751 int
6752 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
6753 {
6754         vnode_t vp;
6755         struct nameidata nd;
6756         vfs_context_t ctx = vfs_context_current();
6757         uio_t auio = NULL;
6758         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6759         size_t attrsize = 0;
6760         u_long nameiflags;
6761         int error;
6762         char uio_buf[ UIO_SIZEOF(1) ];
6763
6764         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
6765                 return (EINVAL);
6766
6767         nameiflags = ((uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW) | NOTRIGGER;
6768         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
6769         if ((error = namei(&nd))) {
6770                 return (error);
6771         }
6772         vp = nd.ni_vp;
6773         nameidone(&nd);
6774         if (uap->namebuf != 0 && uap->bufsize > 0) {
6775                 // LP64todo - fix this!
6776                 auio = uio_createwithbuffer(1, 0, spacetype,
6777                                                                           UIO_READ, &uio_buf[0], sizeof(uio_buf));
6778                 uio_addiov(auio, uap->namebuf, uap->bufsize);
6779         }
6780
6781         error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
6782
6783         vnode_put(vp);
6784         if (auio) {
6785                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
6786         } else {
6787                 *retval = (user_ssize_t)attrsize;
6788         }
6789         return (error);
6790 }
6791
6792 /*
6793  * Retrieve the list of extended attribute names.
6794  */
6795 #warning "code duplication"
6796 int
6797 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
6798 {
6799         vnode_t vp;
6800         uio_t auio = NULL;
6801         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6802         size_t attrsize = 0;
6803         int error;
6804         char uio_buf[ UIO_SIZEOF(1) ];
6805
6806         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
6807                 return (EINVAL);
6808
6809         if ( (error = file_vnode(uap->fd, &vp)) ) {
6810                 return (error);
6811         }
6812         if ( (error = vnode_getwithref(vp)) ) {
6813                 file_drop(uap->fd);
6814                 return(error);
6815         }
6816         if (uap->namebuf != 0 && uap->bufsize > 0) {
6817                 // LP64todo - fix this!
6818                 auio = uio_createwithbuffer(1, 0, spacetype,
6819                                                                           UIO_READ, &uio_buf[0], sizeof(uio_buf));
6820                 uio_addiov(auio, uap->namebuf, uap->bufsize);
6821         }
6822
6823         error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
6824
6825         vnode_put(vp);
6826         file_drop(uap->fd);
6827         if (auio) {
6828                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
6829         } else {
6830                 *retval = (user_ssize_t)attrsize;
6831         }
6832         return (error);
6833 }
6834
6835 /*
6836  * Common routine to handle various flavors of statfs data heading out
6837  *      to user space.
6838  *
6839  * Returns:     0                       Success
6840  *              EFAULT
6841  */
6842 static int
6843 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
6844     user_addr_t bufp, int *sizep, boolean_t is_64_bit,
6845     boolean_t partial_copy)
6846 {
6847         int             error;
6848         int             my_size, copy_size;
6849
6850         if (is_64_bit) {
6851                 struct user_statfs sfs;
6852                 my_size = copy_size = sizeof(sfs);
6853                 bzero(&sfs, my_size);
6854                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
6855                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
6856                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
6857                 sfs.f_bsize = (user_long_t)sfsp->f_bsize;
6858                 sfs.f_iosize = (user_long_t)sfsp->f_iosize;
6859                 sfs.f_blocks = (user_long_t)sfsp->f_blocks;
6860                 sfs.f_bfree = (user_long_t)sfsp->f_bfree;
6861                 sfs.f_bavail = (user_long_t)sfsp->f_bavail;
6862                 sfs.f_files = (user_long_t)sfsp->f_files;
6863                 sfs.f_ffree = (user_long_t)sfsp->f_ffree;
6864                 sfs.f_fsid = sfsp->f_fsid;
6865                 sfs.f_owner = sfsp->f_owner;
6866                 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
6867                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
6868                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
6869
6870                 if (partial_copy) {
6871                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
6872                 }
6873                 error = copyout((caddr_t)&sfs, bufp, copy_size);
6874         }
6875         else {
6876                 struct statfs sfs;
6877                 my_size = copy_size = sizeof(sfs);
6878                 bzero(&sfs, my_size);
6879
6880                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
6881                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
6882                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
6883
6884                 /*
6885                  * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
6886                  * have to fudge the numbers here in that case.   We inflate the blocksize in order
6887                  * to reflect the filesystem size as best we can.
6888                  */
6889                 if ((sfsp->f_blocks > LONG_MAX)
6890                         /* Hack for 4061702 . I think the real fix is for Carbon to
6891                          * look for some volume capability and not depend on hidden
6892                          * semantics agreed between a FS and carbon.
6893                          * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
6894                          * for Carbon to set bNoVolumeSizes volume attribute.
6895                          * Without this the webdavfs files cannot be copied onto
6896                          * disk as they look huge. This change should not affect
6897                          * XSAN as they should not setting these to -1..
6898                          */
6899                          && (sfsp->f_blocks != 0xffffffffffffffffULL)
6900                          && (sfsp->f_bfree != 0xffffffffffffffffULL)
6901                          && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
6902                         int             shift;
6903
6904                         /*
6905                          * Work out how far we have to shift the block count down to make it fit.
6906                          * Note that it's possible to have to shift so far that the resulting
6907                          * blocksize would be unreportably large.  At that point, we will clip
6908                          * any values that don't fit.
6909                          *
6910                          * For safety's sake, we also ensure that f_iosize is never reported as
6911                          * being smaller than f_bsize.
6912                          */
6913                         for (shift = 0; shift < 32; shift++) {
6914                                 if ((sfsp->f_blocks >> shift) <= LONG_MAX)
6915                                         break;
6916                                 if ((sfsp->f_bsize << (shift + 1)) > LONG_MAX)
6917                                         break;
6918                         }
6919 #define __SHIFT_OR_CLIP(x, s)   ((((x) >> (s)) > LONG_MAX) ? LONG_MAX : ((x) >> (s)))
6920                         sfs.f_blocks = (long)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
6921                         sfs.f_bfree = (long)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
6922                         sfs.f_bavail = (long)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
6923 #undef __SHIFT_OR_CLIP
6924                         sfs.f_bsize = (long)(sfsp->f_bsize << shift);
6925                         sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
6926                 } else {
6927                         /* filesystem is small enough to be reported honestly */
6928                         sfs.f_bsize = (long)sfsp->f_bsize;
6929                         sfs.f_iosize = (long)sfsp->f_iosize;
6930                         sfs.f_blocks = (long)sfsp->f_blocks;
6931                         sfs.f_bfree = (long)sfsp->f_bfree;
6932                         sfs.f_bavail = (long)sfsp->f_bavail;
6933                 }
6934                 sfs.f_files = (long)sfsp->f_files;
6935                 sfs.f_ffree = (long)sfsp->f_ffree;
6936                 sfs.f_fsid = sfsp->f_fsid;
6937                 sfs.f_owner = sfsp->f_owner;
6938                 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
6939                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
6940                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
6941
6942                 if (partial_copy) {
6943                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
6944                 }
6945                 error = copyout((caddr_t)&sfs, bufp, copy_size);
6946         }
6947
6948         if (sizep != NULL) {
6949                 *sizep = my_size;
6950         }
6951         return(error);
6952 }
6953
6954 /*
6955  * copy stat structure into user_stat structure.
6956  */
6957 void munge_stat(struct stat *sbp, struct user_stat *usbp)
6958 {
6959         bzero(usbp, sizeof(struct user_stat));
6960
6961         usbp->st_dev = sbp->st_dev;
6962         usbp->st_ino = sbp->st_ino;
6963         usbp->st_mode = sbp->st_mode;
6964         usbp->st_nlink = sbp->st_nlink;
6965         usbp->st_uid = sbp->st_uid;
6966         usbp->st_gid = sbp->st_gid;
6967         usbp->st_rdev = sbp->st_rdev;
6968 #ifndef _POSIX_C_SOURCE
6969         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
6970         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
6971         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
6972         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
6973         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
6974         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
6975 #else
6976         usbp->st_atime = sbp->st_atime;
6977         usbp->st_atimensec = sbp->st_atimensec;
6978         usbp->st_mtime = sbp->st_mtime;
6979         usbp->st_mtimensec = sbp->st_mtimensec;
6980         usbp->st_ctime = sbp->st_ctime;
6981         usbp->st_ctimensec = sbp->st_ctimensec;
6982 #endif
6983         usbp->st_size = sbp->st_size;
6984         usbp->st_blocks = sbp->st_blocks;
6985         usbp->st_blksize = sbp->st_blksize;
6986         usbp->st_flags = sbp->st_flags;
6987         usbp->st_gen = sbp->st_gen;
6988         usbp->st_lspare = sbp->st_lspare;
6989         usbp->st_qspare[0] = sbp->st_qspare[0];
6990         usbp->st_qspare[1] = sbp->st_qspare[1];
6991 }
6992
6993 /*
6994  * copy stat64 structure into user_stat64 structure.
6995  */
6996 void munge_stat64(struct stat64 *sbp, struct user_stat64 *usbp)
6997 {
6998         bzero(usbp, sizeof(struct user_stat));
6999
7000         usbp->st_dev = sbp->st_dev;
7001         usbp->st_ino = sbp->st_ino;
7002         usbp->st_mode = sbp->st_mode;
7003         usbp->st_nlink = sbp->st_nlink;
7004         usbp->st_uid = sbp->st_uid;
7005         usbp->st_gid = sbp->st_gid;
7006         usbp->st_rdev = sbp->st_rdev;
7007 #ifndef _POSIX_C_SOURCE
7008         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
7009         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
7010         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
7011         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
7012         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
7013         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
7014         usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
7015         usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
7016 #else
7017         usbp->st_atime = sbp->st_atime;
7018         usbp->st_atimensec = sbp->st_atimensec;
7019         usbp->st_mtime = sbp->st_mtime;
7020         usbp->st_mtimensec = sbp->st_mtimensec;
7021         usbp->st_ctime = sbp->st_ctime;
7022         usbp->st_ctimensec = sbp->st_ctimensec;
7023         usbp->st_birthtime = sbp->st_birthtime;
7024         usbp->st_birthtimensec = sbp->st_birthtimensec;
7025 #endif
7026         usbp->st_size = sbp->st_size;
7027         usbp->st_blocks = sbp->st_blocks;
7028         usbp->st_blksize = sbp->st_blksize;
7029         usbp->st_flags = sbp->st_flags;
7030         usbp->st_gen = sbp->st_gen;
7031         usbp->st_lspare = sbp->st_lspare;
7032         usbp->st_qspare[0] = sbp->st_qspare[0];
7033         usbp->st_qspare[1] = sbp->st_qspare[1];
7034 }