bsd/vfs/vfs_syscalls.c

   1 /*
   2  * Copyright (c) 1995-2008 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1989, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  * (c) UNIX System Laboratories, Inc.
  32  * All or some portions of this file are derived from material licensed
  33  * to the University of California by American Telephone and Telegraph
  34  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  35  * the permission of UNIX System Laboratories, Inc.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      @(#)vfs_syscalls.c      8.41 (Berkeley) 6/15/95
  66  */
  67 /*
  68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  69  * support for mandatory and extensible security protections.  This notice
  70  * is included in support of clause 2.2 (b) of the Apple Public License,
  71  * Version 2.0.
  72  */
  73
  74 #include <sys/param.h>
  75 #include <sys/systm.h>
  76 #include <sys/namei.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/kernel.h>
  79 #include <sys/file_internal.h>
  80 #include <sys/stat.h>
  81 #include <sys/vnode_internal.h>
  82 #include <sys/mount_internal.h>
  83 #include <sys/proc_internal.h>
  84 #include <sys/kauth.h>
  85 #include <sys/uio_internal.h>
  86 #include <sys/malloc.h>
  87 #include <sys/mman.h>
  88 #include <sys/dirent.h>
  89 #include <sys/attr.h>
  90 #include <sys/sysctl.h>
  91 #include <sys/ubc.h>
  92 #include <sys/quota.h>
  93 #include <sys/kdebug.h>
  94 #include <sys/fsevents.h>
  95 #include <sys/sysproto.h>
  96 #include <sys/xattr.h>
  97 #include <sys/fcntl.h>
  98 #include <sys/fsctl.h>
  99 #include <sys/ubc_internal.h>
 100 #include <sys/disk.h>
 101 #include <machine/cons.h>
 102 #include <machine/limits.h>
 103 #include <miscfs/specfs/specdev.h>
 104 #include <miscfs/union/union.h>
 105
 106 #include <security/audit/audit.h>
 107 #include <bsm/audit_kevents.h>
 108
 109 #include <mach/mach_types.h>
 110 #include <kern/kern_types.h>
 111 #include <kern/kalloc.h>
 112
 113 #include <vm/vm_pageout.h>
 114
 115 #include <libkern/OSAtomic.h>
 116 #include <pexpert/pexpert.h>
 117
 118 #if CONFIG_MACF
 119 #include <security/mac.h>
 120 #include <security/mac_framework.h>
 121 #endif
 122
 123 #if CONFIG_FSE
 124 #define GET_PATH(x) \
 125         (x) = get_pathbuff();
 126 #define RELEASE_PATH(x) \
 127         release_pathbuff(x);
 128 #else
 129 #define GET_PATH(x)     \
 130         MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
 131 #define RELEASE_PATH(x) \
 132         FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
 133 #endif /* CONFIG_FSE */
 134
 135 /* struct for checkdirs iteration */
 136 struct cdirargs {
 137         vnode_t olddp;
 138         vnode_t newdp;
 139 };
 140 /* callback  for checkdirs iteration */
 141 static int checkdirs_callback(proc_t p, void * arg);
 142
 143 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
 144 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
 145 void enablequotas(struct mount *mp, vfs_context_t ctx);
 146 static int getfsstat_callback(mount_t mp, void * arg);
 147 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
 148 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
 149 static int sync_callback(mount_t, void *);
 150 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
 151                         user_addr_t bufp, int *sizep, boolean_t is_64_bit,
 152                                                 boolean_t partial_copy);
 153 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
 154                         user_addr_t bufp);
 155 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
 156 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
 157
 158 __private_extern__
 159 int sync_internal(void);
 160
 161 __private_extern__
 162 int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, int32_t *);
 163
 164 __private_extern__
 165 int unlink1(vfs_context_t, struct nameidata *, int);
 166
 167
 168 #ifdef __APPLE_API_OBSOLETE
 169 struct fstatv_args {
 170        int fd;                  /* file descriptor of the target file */
 171        struct vstat *vsb;       /* vstat structure for returned info  */
 172 };
 173 struct lstatv_args {
 174        const char *path;        /* pathname of the target file       */
 175        struct vstat *vsb;       /* vstat structure for returned info */
 176 };
 177 struct mkcomplex_args {
 178         const char *path;       /* pathname of the file to be created */
 179                 mode_t mode;            /* access mode for the newly created file */
 180         u_int32_t type;         /* format of the complex file */
 181 };
 182 struct statv_args {
 183         const char *path;       /* pathname of the target file       */
 184         struct vstat *vsb;      /* vstat structure for returned info */
 185 };
 186
 187 int fstatv(proc_t p, struct fstatv_args *uap, int32_t *retval);
 188 int lstatv(proc_t p, struct lstatv_args *uap, int32_t *retval);
 189 int mkcomplex(proc_t p, struct mkcomplex_args *uap, int32_t *retval);
 190 int statv(proc_t p, struct statv_args *uap, int32_t *retval);
 191
 192 #endif /* __APPLE_API_OBSOLETE */
 193
 194 /*
 195  * incremented each time a mount or unmount operation occurs
 196  * used to invalidate the cached value of the rootvp in the
 197  * mount structure utilized by cache_lookup_path
 198  */
 199 uint32_t mount_generation = 0;
 200
 201 /* counts number of mount and unmount operations */
 202 unsigned int vfs_nummntops=0;
 203
 204 extern struct fileops vnops;
 205 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
 206
 207
 208 /*
 209  * Virtual File System System Calls
 210  */
 211
 212 /*
 213  * Mount a file system.
 214  */
 215 /* ARGSUSED */
 216 int
 217 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
 218 {
 219         struct __mac_mount_args muap;
 220
 221         muap.type = uap->type;
 222         muap.path = uap->path;
 223         muap.flags = uap->flags;
 224         muap.data = uap->data;
 225         muap.mac_p = USER_ADDR_NULL;
 226         return (__mac_mount(p, &muap, retval));
 227 }
 228
 229 /*
 230  * __mac_mount:
 231  *      Mount a file system taking into account MAC label behavior.
 232  *      See mount(2) man page for more information
 233  *
 234  * Parameters:    p                        Process requesting the mount
 235  *                uap                      User argument descriptor (see below)
 236  *                retval                   (ignored)
 237  *
 238  * Indirect:      uap->type                Filesystem type
 239  *                uap->path                Path to mount
 240  *                uap->data                Mount arguments
 241  *                uap->mac_p               MAC info
 242  *                uap->flags               Mount flags
 243  *
 244  *
 245  * Returns:        0                       Success
 246  *                !0                       Not success
 247  */
 248 int
 249 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
 250 {
 251         struct vnode *vp, *pvp;
 252         struct vnode *devvp = NULLVP;
 253         struct vnode *device_vnode = NULLVP;
 254 #if CONFIG_MACF
 255         struct vnode *rvp;
 256 #endif
 257         struct mount *mp;
 258         struct vfstable *vfsp = (struct vfstable *)0;
 259         int error, flag = 0;
 260         struct vnode_attr va;
 261         vfs_context_t ctx = vfs_context_current();
 262         struct nameidata nd;
 263         struct nameidata nd1;
 264         char fstypename[MFSNAMELEN];
 265         size_t dummy=0;
 266         user_addr_t devpath = USER_ADDR_NULL;
 267         user_addr_t fsmountargs =  uap->data;
 268         int ronly = 0;
 269         int mntalloc = 0;
 270         boolean_t vfsp_ref = FALSE;
 271         mode_t accessmode;
 272         boolean_t is_64bit;
 273         boolean_t is_rwlock_locked = FALSE;
 274         boolean_t did_rele = FALSE;
 275         boolean_t have_usecount = FALSE;
 276
 277         AUDIT_ARG(fflags, uap->flags);
 278
 279         is_64bit = proc_is64bit(p);
 280
 281         /*
 282          * Get vnode to be covered
 283          */
 284         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1 | WANTPARENT,
 285                    UIO_USERSPACE, uap->path, ctx);
 286         error = namei(&nd);
 287         if (error)
 288                 return (error);
 289         vp = nd.ni_vp;
 290         pvp = nd.ni_dvp;
 291
 292         if ((vp->v_flag & VROOT) &&
 293                 (vp->v_mount->mnt_flag & MNT_ROOTFS))
 294                         uap->flags |= MNT_UPDATE;
 295
 296         error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
 297         if (error)
 298                 goto out1;
 299
 300         if (uap->flags & MNT_UPDATE) {
 301                 if ((vp->v_flag & VROOT) == 0) {
 302                         error = EINVAL;
 303                         goto out1;
 304                 }
 305                 mp = vp->v_mount;
 306
 307                 /* unmount in progress return error */
 308                 mount_lock_spin(mp);
 309                 if (mp->mnt_lflag & MNT_LUNMOUNT) {
 310                         mount_unlock(mp);
 311                         error = EBUSY;
 312                         goto out1;
 313                 }
 314                 mount_unlock(mp);
 315                 lck_rw_lock_exclusive(&mp->mnt_rwlock);
 316                 is_rwlock_locked = TRUE;
 317                 /*
 318                  * We only allow the filesystem to be reloaded if it
 319                  * is currently mounted read-only.
 320                  */
 321                 if ((uap->flags & MNT_RELOAD) &&
 322                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
 323                         error = ENOTSUP;
 324                         goto out1;
 325                 }
 326                 /*
 327                  * Only root, or the user that did the original mount is
 328                  * permitted to update it.
 329                  */
 330                 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
 331                     (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
 332                         goto out1;
 333                 }
 334 #if CONFIG_MACF
 335                 error = mac_mount_check_remount(ctx, mp);
 336                 if (error != 0) {
 337                         lck_rw_done(&mp->mnt_rwlock);
 338                         goto out1;
 339                 }
 340 #endif
 341                 /*
 342                  * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
 343                  * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
 344                  */
 345                 if (suser(vfs_context_ucred(ctx), NULL)) {
 346                         uap->flags |= MNT_NOSUID | MNT_NODEV;
 347                         if (mp->mnt_flag & MNT_NOEXEC)
 348                                 uap->flags |= MNT_NOEXEC;
 349                 }
 350                 flag = mp->mnt_flag;
 351
 352                 mp->mnt_flag |=
 353                     uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
 354
 355                 vfsp = mp->mnt_vtable;
 356                 goto update;
 357         }
 358         /*
 359          * If the user is not root, ensure that they own the directory
 360          * onto which we are attempting to mount.
 361          */
 362         VATTR_INIT(&va);
 363         VATTR_WANTED(&va, va_uid);
 364         if ((error = vnode_getattr(vp, &va, ctx)) ||
 365             (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
 366              (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))) {
 367                 goto out1;
 368         }
 369         /*
 370          * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
 371          * MNT_NOEXEC if mount point is already MNT_NOEXEC.
 372          */
 373         if (suser(vfs_context_ucred(ctx), NULL)) {
 374                 uap->flags |= MNT_NOSUID | MNT_NODEV;
 375                 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
 376                         uap->flags |= MNT_NOEXEC;
 377         }
 378         if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
 379                 goto out1;
 380
 381         if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
 382                 goto out1;
 383
 384         if (vp->v_type != VDIR) {
 385                 error = ENOTDIR;
 386                 goto out1;
 387         }
 388
 389         /* XXXAUDIT: Should we capture the type on the error path as well? */
 390         AUDIT_ARG(text, fstypename);
 391         mount_list_lock();
 392         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 393                 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
 394                         vfsp->vfc_refcount++;
 395                         vfsp_ref = TRUE;
 396                         break;
 397                 }
 398         mount_list_unlock();
 399         if (vfsp == NULL) {
 400                 error = ENODEV;
 401                 goto out1;
 402         }
 403 #if CONFIG_MACF
 404         error = mac_mount_check_mount(ctx, vp,
 405             &nd.ni_cnd, vfsp->vfc_name);
 406         if (error != 0)
 407                 goto out1;
 408 #endif
 409         if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
 410                 error = EBUSY;
 411                 goto out1;
 412         }
 413         vnode_lock_spin(vp);
 414         SET(vp->v_flag, VMOUNT);
 415         vnode_unlock(vp);
 416
 417         /*
 418          * Allocate and initialize the filesystem.
 419          */
 420         MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
 421                 M_MOUNT, M_WAITOK);
 422         bzero((char *)mp, (u_int32_t)sizeof(struct mount));
 423         mntalloc = 1;
 424
 425         /* Initialize the default IO constraints */
 426         mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
 427         mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
 428         mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
 429         mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
 430         mp->mnt_devblocksize = DEV_BSIZE;
 431         mp->mnt_alignmentmask = PAGE_MASK;
 432         mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
 433         mp->mnt_ioscale = 1;
 434         mp->mnt_ioflags = 0;
 435         mp->mnt_realrootvp = NULLVP;
 436         mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
 437
 438         TAILQ_INIT(&mp->mnt_vnodelist);
 439         TAILQ_INIT(&mp->mnt_workerqueue);
 440         TAILQ_INIT(&mp->mnt_newvnodes);
 441         mount_lock_init(mp);
 442         lck_rw_lock_exclusive(&mp->mnt_rwlock);
 443         is_rwlock_locked = TRUE;
 444         mp->mnt_op = vfsp->vfc_vfsops;
 445         mp->mnt_vtable = vfsp;
 446         //mp->mnt_stat.f_type = vfsp->vfc_typenum;
 447         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 448         strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
 449         strncpy(mp->mnt_vfsstat.f_mntonname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
 450         mp->mnt_vnodecovered = vp;
 451         mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
 452         mp->mnt_devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
 453
 454         /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
 455         vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
 456
 457 update:
 458         /*
 459          * Set the mount level flags.
 460          */
 461         if (uap->flags & MNT_RDONLY)
 462                 mp->mnt_flag |= MNT_RDONLY;
 463         else if (mp->mnt_flag & MNT_RDONLY)
 464                 mp->mnt_kern_flag |= MNTK_WANTRDWR;
 465         mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 466                           MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 467                           MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED |
 468                           MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE);
 469         mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 470                                       MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 471                                       MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED |
 472                                           MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE);
 473
 474 #if CONFIG_MACF
 475         if (uap->flags & MNT_MULTILABEL) {
 476                 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
 477                         error = EINVAL;
 478                         goto out1;
 479                 }
 480                 mp->mnt_flag |= MNT_MULTILABEL;
 481         }
 482 #endif
 483
 484         if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
 485                 if (is_64bit) {
 486                         if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
 487                                 goto out1;
 488                         fsmountargs += sizeof(devpath);
 489                 } else {
 490                         user32_addr_t tmp;
 491                         if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
 492                                 goto out1;
 493                         /* munge into LP64 addr */
 494                         devpath = CAST_USER_ADDR_T(tmp);
 495                         fsmountargs += sizeof(tmp);
 496                 }
 497
 498                 /* if it is not update and device name needs to be parsed */
 499                 if ((devpath)) {
 500                         NDINIT(&nd1, LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
 501                         if ( (error = namei(&nd1)) )
 502                                 goto out1;
 503
 504                         strncpy(mp->mnt_vfsstat.f_mntfromname, nd1.ni_cnd.cn_pnbuf, MAXPATHLEN);
 505                         devvp = nd1.ni_vp;
 506
 507                         nameidone(&nd1);
 508
 509                         if (devvp->v_type != VBLK) {
 510                                 error = ENOTBLK;
 511                                 goto out2;
 512                         }
 513                         if (major(devvp->v_rdev) >= nblkdev) {
 514                                 error = ENXIO;
 515                                 goto out2;
 516                         }
 517                         /*
 518                         * If mount by non-root, then verify that user has necessary
 519                         * permissions on the device.
 520                         */
 521                         if (suser(vfs_context_ucred(ctx), NULL) != 0) {
 522                                 accessmode = KAUTH_VNODE_READ_DATA;
 523                                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
 524                                         accessmode |= KAUTH_VNODE_WRITE_DATA;
 525                                 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
 526                                         goto out2;
 527                         }
 528                 }
 529                 if (devpath && ((uap->flags & MNT_UPDATE) == 0)) {
 530                         if ( (error = vnode_ref(devvp)) )
 531                                 goto out2;
 532                         /*
 533                         * Disallow multiple mounts of the same device.
 534                         * Disallow mounting of a device that is currently in use
 535                         * (except for root, which might share swap device for miniroot).
 536                         * Flush out any old buffers remaining from a previous use.
 537                         */
 538                         if ( (error = vfs_mountedon(devvp)) )
 539                                 goto out3;
 540
 541                         if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
 542                                 error = EBUSY;
 543                                 goto out3;
 544                         }
 545                         if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
 546                                 error = ENOTBLK;
 547                                 goto out3;
 548                         }
 549                         if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
 550                                 goto out3;
 551
 552                         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 553 #if CONFIG_MACF
 554                         error = mac_vnode_check_open(ctx,
 555                             devvp,
 556                             ronly ? FREAD : FREAD|FWRITE);
 557                         if (error)
 558                                 goto out3;
 559 #endif /* MAC */
 560                         if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
 561                                 goto out3;
 562
 563                         mp->mnt_devvp = devvp;
 564                         device_vnode = devvp;
 565                 } else {
 566                         if ((mp->mnt_flag & MNT_RDONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 567                                 dev_t dev;
 568                                 int maj;
 569                                 /*
 570                                  * If upgrade to read-write by non-root, then verify
 571                                  * that user has necessary permissions on the device.
 572                                  */
 573                                 device_vnode = mp->mnt_devvp;
 574
 575                                 if (device_vnode) {
 576                                         vnode_getalways(device_vnode);
 577
 578                                         if (suser(vfs_context_ucred(ctx), NULL)) {
 579                                                 if ((error = vnode_authorize(device_vnode, NULL,
 580                                                                                 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) {
 581                                                         vnode_put(device_vnode);
 582                                                         goto out2;
 583                                                 }
 584                                         }
 585
 586                                         /* Tell the device that we're upgrading */
 587                                         dev = (dev_t)device_vnode->v_rdev;
 588                                         maj = major(dev);
 589
 590                                         if ((u_int)maj >= (u_int)nblkdev)
 591                                                 panic("Volume mounted on a device with invalid major number.\n");
 592
 593                                         error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
 594
 595                                         vnode_put(device_vnode);
 596                                         if (error != 0) {
 597                                                 goto out2;
 598                                         }
 599                                 }
 600                         }
 601                         device_vnode = NULLVP;
 602                 }
 603         }
 604 #if CONFIG_MACF
 605         if ((uap->flags & MNT_UPDATE) == 0) {
 606                 mac_mount_label_init(mp);
 607                 mac_mount_label_associate(ctx, mp);
 608         }
 609         if (uap->mac_p != USER_ADDR_NULL) {
 610                 struct user_mac mac;
 611                 char *labelstr = NULL;
 612                 size_t ulen = 0;
 613
 614                 if ((uap->flags & MNT_UPDATE) != 0) {
 615                         error = mac_mount_check_label_update(
 616                             ctx, mp);
 617                         if (error != 0)
 618                                 goto out3;
 619                 }
 620                 if (is_64bit) {
 621                         error = copyin(uap->mac_p, &mac, sizeof(mac));
 622                 } else {
 623                         struct mac mac32;
 624                         error = copyin(uap->mac_p, &mac32, sizeof(mac32));
 625                         mac.m_buflen = mac32.m_buflen;
 626                         mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
 627                 }
 628                 if (error != 0)
 629                         goto out3;
 630                 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
 631                     (mac.m_buflen < 2)) {
 632                         error = EINVAL;
 633                         goto out3;
 634                 }
 635                 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
 636                 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
 637                 if (error != 0) {
 638                         FREE(labelstr, M_MACTEMP);
 639                         goto out3;
 640                 }
 641                 AUDIT_ARG(mac_string, labelstr);
 642                 error = mac_mount_label_internalize(mp->mnt_mntlabel, labelstr);
 643                 FREE(labelstr, M_MACTEMP);
 644                 if (error != 0)
 645                         goto out3;
 646         }
 647 #endif
 648         if (device_vnode != NULL) {
 649                 VNOP_IOCTL(device_vnode, DKIOCGETBSDUNIT, (caddr_t)&mp->mnt_devbsdunit, 0, NULL);
 650                 mp->mnt_devbsdunit %= LOWPRI_MAX_NUM_DEV;
 651         }
 652
 653         /*
 654          * Mount the filesystem.
 655          */
 656         error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
 657
 658         if (uap->flags & MNT_UPDATE) {
 659                 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
 660                         mp->mnt_flag &= ~MNT_RDONLY;
 661                 mp->mnt_flag &=~
 662                     (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
 663                 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
 664                 if (error)
 665                         mp->mnt_flag = flag;
 666                 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
 667                 lck_rw_done(&mp->mnt_rwlock);
 668                 is_rwlock_locked = FALSE;
 669                 if (!error)
 670                         enablequotas(mp, ctx);
 671                 goto out2;
 672         }
 673         /*
 674          * Put the new filesystem on the mount list after root.
 675          */
 676         if (error == 0) {
 677                 struct vfs_attr vfsattr;
 678 #if CONFIG_MACF
 679                 if (vfs_flags(mp) & MNT_MULTILABEL) {
 680                         error = VFS_ROOT(mp, &rvp, ctx);
 681                         if (error) {
 682                                 printf("%s() VFS_ROOT returned %d\n", __func__, error);
 683                                 goto out3;
 684                         }
 685                         error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
 686                         /*
 687                          * drop reference provided by VFS_ROOT
 688                          */
 689                         vnode_put(rvp);
 690
 691                         if (error)
 692                                 goto out3;
 693                 }
 694 #endif  /* MAC */
 695
 696                 vnode_lock_spin(vp);
 697                 CLR(vp->v_flag, VMOUNT);
 698                 vp->v_mountedhere = mp;
 699                 vnode_unlock(vp);
 700
 701                 /*
 702                  * taking the name_cache_lock exclusively will
 703                  * insure that everyone is out of the fast path who
 704                  * might be trying to use a now stale copy of
 705                  * vp->v_mountedhere->mnt_realrootvp
 706                  * bumping mount_generation causes the cached values
 707                  * to be invalidated
 708                  */
 709                 name_cache_lock();
 710                 mount_generation++;
 711                 name_cache_unlock();
 712
 713                 error = vnode_ref(vp);
 714                 if (error != 0) {
 715                         goto out4;
 716                 }
 717
 718                 have_usecount = TRUE;
 719
 720                 error = checkdirs(vp, ctx);
 721                 if (error != 0)  {
 722                         /* Unmount the filesystem as cdir/rdirs cannot be updated */
 723                         goto out4;
 724                 }
 725                 /*
 726                  * there is no cleanup code here so I have made it void
 727                  * we need to revisit this
 728                  */
 729                 (void)VFS_START(mp, 0, ctx);
 730
 731                 error = mount_list_add(mp);
 732                 if (error != 0) {
 733                         goto out4;
 734                 }
 735
 736                 lck_rw_done(&mp->mnt_rwlock);
 737                 is_rwlock_locked = FALSE;
 738
 739                 /* Check if this mounted file system supports EAs or named streams. */
 740                 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
 741                 VFSATTR_INIT(&vfsattr);
 742                 VFSATTR_WANTED(&vfsattr, f_capabilities);
 743                 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
 744                     vfs_getattr(mp, &vfsattr, ctx) == 0 &&
 745                     VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
 746                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
 747                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
 748                                 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 749                         }
 750 #if NAMEDSTREAMS
 751                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
 752                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
 753                                 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
 754                         }
 755 #endif
 756                         /* Check if this file system supports path from id lookups. */
 757                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
 758                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
 759                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 760                         } else if (mp->mnt_flag & MNT_DOVOLFS) {
 761                                 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
 762                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 763                         }
 764                 }
 765                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
 766                         mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 767                 }
 768                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
 769                         mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
 770                 }
 771                 /* increment the operations count */
 772                 OSAddAtomic(1, &vfs_nummntops);
 773                 enablequotas(mp, ctx);
 774
 775                 if (device_vnode) {
 776                         device_vnode->v_specflags |= SI_MOUNTEDON;
 777
 778                         /*
 779                          *   cache the IO attributes for the underlying physical media...
 780                          *   an error return indicates the underlying driver doesn't
 781                          *   support all the queries necessary... however, reasonable
 782                          *   defaults will have been set, so no reason to bail or care
 783                          */
 784                         vfs_init_io_attributes(device_vnode, mp);
 785                 }
 786
 787                 /* Now that mount is setup, notify the listeners */
 788                 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
 789         } else {
 790                 vnode_lock_spin(vp);
 791                 CLR(vp->v_flag, VMOUNT);
 792                 vnode_unlock(vp);
 793                 mount_list_lock();
 794                 mp->mnt_vtable->vfc_refcount--;
 795                 mount_list_unlock();
 796
 797                 if (device_vnode ) {
 798                         vnode_rele(device_vnode);
 799                         VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
 800                 }
 801                 lck_rw_done(&mp->mnt_rwlock);
 802                 is_rwlock_locked = FALSE;
 803                 mount_lock_destroy(mp);
 804 #if CONFIG_MACF
 805                 mac_mount_label_destroy(mp);
 806 #endif
 807                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
 808         }
 809         nameidone(&nd);
 810
 811         /*
 812          * drop I/O count on covered 'vp' and
 813          * on the device vp if there was one
 814          */
 815         if (devpath && devvp)
 816                 vnode_put(devvp);
 817         vnode_put(vp);
 818
 819         /* Note that we've changed something in the parent directory */
 820         post_event_if_success(pvp, error, NOTE_WRITE);
 821         vnode_put(pvp);
 822
 823         return(error);
 824
 825 out4:
 826         (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
 827         if (device_vnode != NULLVP) {
 828                 vnode_rele(device_vnode);
 829                 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
 830                        ctx);
 831                 did_rele = TRUE;
 832         }
 833         vnode_lock_spin(vp);
 834         vp->v_mountedhere = (mount_t) 0;
 835         vnode_unlock(vp);
 836
 837         if (have_usecount) {
 838                 vnode_rele(vp);
 839         }
 840 out3:
 841         if (devpath && ((uap->flags & MNT_UPDATE) == 0) && (!did_rele))
 842                 vnode_rele(devvp);
 843 out2:
 844         if (devpath && devvp)
 845                 vnode_put(devvp);
 846 out1:
 847         /* Release mnt_rwlock only when it was taken */
 848         if (is_rwlock_locked == TRUE) {
 849                 lck_rw_done(&mp->mnt_rwlock);
 850         }
 851         if (mntalloc) {
 852 #if CONFIG_MACF
 853                 mac_mount_label_destroy(mp);
 854 #endif
 855                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
 856         }
 857
 858         if (vfsp_ref) {
 859                 mount_list_lock();
 860                 vfsp->vfc_refcount--;
 861                 mount_list_unlock();
 862         }
 863         vnode_put(vp);
 864         vnode_put(pvp);
 865         nameidone(&nd);
 866
 867         return(error);
 868 }
 869
 870 void
 871 enablequotas(struct mount *mp, vfs_context_t ctx)
 872 {
 873         struct nameidata qnd;
 874         int type;
 875         char qfpath[MAXPATHLEN];
 876         const char *qfname = QUOTAFILENAME;
 877         const char *qfopsname = QUOTAOPSNAME;
 878         const char *qfextension[] = INITQFNAMES;
 879
 880         /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
 881         if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
 882                 return;
 883         }
 884         /*
 885          * Enable filesystem disk quotas if necessary.
 886          * We ignore errors as this should not interfere with final mount
 887          */
 888         for (type=0; type < MAXQUOTAS; type++) {
 889                 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
 890                 NDINIT(&qnd, LOOKUP, FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T(qfpath), ctx);
 891                 if (namei(&qnd) != 0)
 892                         continue;           /* option file to trigger quotas is not present */
 893                 vnode_put(qnd.ni_vp);
 894                 nameidone(&qnd);
 895                 snprintf(qfpath, sizeof(qfpath),  "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
 896
 897                 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
 898         }
 899         return;
 900 }
 901
 902
 903 static int
 904 checkdirs_callback(proc_t p, void * arg)
 905 {
 906         struct cdirargs * cdrp = (struct cdirargs * )arg;
 907         vnode_t olddp = cdrp->olddp;
 908         vnode_t newdp = cdrp->newdp;
 909         struct filedesc *fdp;
 910         vnode_t tvp;
 911         vnode_t fdp_cvp;
 912         vnode_t fdp_rvp;
 913         int cdir_changed = 0;
 914         int rdir_changed = 0;
 915
 916         /*
 917          * XXX Also needs to iterate each thread in the process to see if it
 918          * XXX is using a per-thread current working directory, and, if so,
 919          * XXX update that as well.
 920          */
 921
 922         proc_fdlock(p);
 923         fdp = p->p_fd;
 924         if (fdp == (struct filedesc *)0) {
 925                 proc_fdunlock(p);
 926                 return(PROC_RETURNED);
 927         }
 928         fdp_cvp = fdp->fd_cdir;
 929         fdp_rvp = fdp->fd_rdir;
 930         proc_fdunlock(p);
 931
 932         if (fdp_cvp == olddp) {
 933                 vnode_ref(newdp);
 934                 tvp = fdp->fd_cdir;
 935                 fdp_cvp = newdp;
 936                 cdir_changed = 1;
 937                 vnode_rele(tvp);
 938         }
 939         if (fdp_rvp == olddp) {
 940                 vnode_ref(newdp);
 941                 tvp = fdp->fd_rdir;
 942                 fdp_rvp = newdp;
 943                 rdir_changed = 1;
 944                 vnode_rele(tvp);
 945         }
 946         if (cdir_changed || rdir_changed) {
 947                 proc_fdlock(p);
 948                 fdp->fd_cdir = fdp_cvp;
 949                 fdp->fd_rdir = fdp_rvp;
 950                 proc_fdunlock(p);
 951         }
 952         return(PROC_RETURNED);
 953 }
 954
 955
 956
 957 /*
 958  * Scan all active processes to see if any of them have a current
 959  * or root directory onto which the new filesystem has just been
 960  * mounted. If so, replace them with the new mount point.
 961  */
 962 static int
 963 checkdirs(vnode_t olddp, vfs_context_t ctx)
 964 {
 965         vnode_t newdp;
 966         vnode_t tvp;
 967         int err;
 968         struct cdirargs cdr;
 969         struct uthread * uth = get_bsdthread_info(current_thread());
 970
 971         if (olddp->v_usecount == 1)
 972                 return(0);
 973         if (uth != (struct uthread *)0)
 974                 uth->uu_notrigger = 1;
 975         err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
 976         if (uth != (struct uthread *)0)
 977                 uth->uu_notrigger = 0;
 978
 979         if (err != 0) {
 980 #if DIAGNOSTIC
 981                 panic("mount: lost mount: error %d", err);
 982 #endif
 983                 return(err);
 984         }
 985
 986         cdr.olddp = olddp;
 987         cdr.newdp = newdp;
 988         /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
 989         proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
 990
 991         if (rootvnode == olddp) {
 992                 vnode_ref(newdp);
 993                 tvp = rootvnode;
 994                 rootvnode = newdp;
 995                 vnode_rele(tvp);
 996         }
 997
 998         vnode_put(newdp);
 999         return(0);
1000 }
1001
1002 /*
1003  * Unmount a file system.
1004  *
1005  * Note: unmount takes a path to the vnode mounted on as argument,
1006  * not special file (as before).
1007  */
1008 /* ARGSUSED */
1009 int
1010 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1011 {
1012         vnode_t vp;
1013         struct mount *mp;
1014         int error;
1015         struct nameidata nd;
1016         vfs_context_t ctx = vfs_context_current();
1017
1018         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1019                 UIO_USERSPACE, uap->path, ctx);
1020         error = namei(&nd);
1021         if (error)
1022                 return (error);
1023         vp = nd.ni_vp;
1024         mp = vp->v_mount;
1025         nameidone(&nd);
1026
1027 #if CONFIG_MACF
1028         error = mac_mount_check_umount(ctx, mp);
1029         if (error != 0) {
1030                 vnode_put(vp);
1031                 return (error);
1032         }
1033 #endif
1034         /*
1035          * Must be the root of the filesystem
1036          */
1037         if ((vp->v_flag & VROOT) == 0) {
1038                 vnode_put(vp);
1039                 return (EINVAL);
1040         }
1041         mount_ref(mp, 0);
1042         vnode_put(vp);
1043         /* safedounmount consumes the mount ref */
1044         return (safedounmount(mp, uap->flags, ctx));
1045 }
1046
1047 int
1048 vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
1049 {
1050         mount_t mp;
1051
1052         mp = mount_list_lookupby_fsid(fsid, 0, 1);
1053         if (mp == (mount_t)0) {
1054                 return(ENOENT);
1055         }
1056         mount_ref(mp, 0);
1057         mount_iterdrop(mp);
1058         /* safedounmount consumes the mount ref */
1059         return(safedounmount(mp, flags, ctx));
1060 }
1061
1062
1063 /*
1064  * The mount struct comes with a mount ref which will be consumed.
1065  * Do the actual file system unmount, prevent some common foot shooting.
1066  */
1067 int
1068 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1069 {
1070         int error;
1071         proc_t p = vfs_context_proc(ctx);
1072
1073         /*
1074          * Only root, or the user that did the original mount is
1075          * permitted to unmount this filesystem.
1076          */
1077         if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1078             (error = suser(kauth_cred_get(), &p->p_acflag)))
1079                 goto out;
1080
1081         /*
1082          * Don't allow unmounting the root file system.
1083          */
1084         if (mp->mnt_flag & MNT_ROOTFS) {
1085                 error = EBUSY; /* the root is always busy */
1086                 goto out;
1087         }
1088
1089         return (dounmount(mp, flags, 1, ctx));
1090
1091 out:
1092         mount_drop(mp, 0);
1093         return(error);
1094 }
1095
1096 /*
1097  * Do the actual file system unmount.
1098  */
1099 int
1100 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1101 {
1102         vnode_t coveredvp = (vnode_t)0;
1103         int error;
1104         int needwakeup = 0;
1105         int forcedunmount = 0;
1106         int lflags = 0;
1107         struct vnode *devvp = NULLVP;
1108
1109         if (flags & MNT_FORCE)
1110                 forcedunmount = 1;
1111         mount_lock(mp);
1112         /* XXX post jaguar fix LK_DRAIN - then clean this up */
1113         if ((flags & MNT_FORCE)) {
1114                 mp->mnt_kern_flag |= MNTK_FRCUNMOUNT;
1115                 mp->mnt_lflag |= MNT_LFORCE;
1116         }
1117         if (mp->mnt_lflag & MNT_LUNMOUNT) {
1118                 mp->mnt_lflag |= MNT_LWAIT;
1119                 if(withref != 0)
1120                         mount_drop(mp, 1);
1121                 msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "dounmount", NULL);
1122                 /*
1123                  * The prior unmount attempt has probably succeeded.
1124                  * Do not dereference mp here - returning EBUSY is safest.
1125                  */
1126                 return (EBUSY);
1127         }
1128         mp->mnt_kern_flag |= MNTK_UNMOUNT;
1129         mp->mnt_lflag |= MNT_LUNMOUNT;
1130         mp->mnt_flag &=~ MNT_ASYNC;
1131         /*
1132          * anyone currently in the fast path that
1133          * trips over the cached rootvp will be
1134          * dumped out and forced into the slow path
1135          * to regenerate a new cached value
1136          */
1137         mp->mnt_realrootvp = NULLVP;
1138         mount_unlock(mp);
1139
1140         /*
1141          * taking the name_cache_lock exclusively will
1142          * insure that everyone is out of the fast path who
1143          * might be trying to use a now stale copy of
1144          * vp->v_mountedhere->mnt_realrootvp
1145          * bumping mount_generation causes the cached values
1146          * to be invalidated
1147          */
1148         name_cache_lock();
1149         mount_generation++;
1150         name_cache_unlock();
1151
1152
1153         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1154         if (withref != 0)
1155                 mount_drop(mp, 0);
1156 #if CONFIG_FSE
1157         fsevent_unmount(mp);  /* has to come first! */
1158 #endif
1159         error = 0;
1160         if (forcedunmount == 0) {
1161                 ubc_umount(mp); /* release cached vnodes */
1162                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1163                         error = VFS_SYNC(mp, MNT_WAIT, ctx);
1164                         if (error) {
1165                                 mount_lock(mp);
1166                                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1167                                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1168                                 mp->mnt_lflag &= ~MNT_LFORCE;
1169                                 goto out;
1170                         }
1171                 }
1172         }
1173
1174         if (forcedunmount)
1175                 lflags |= FORCECLOSE;
1176         error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM  | SKIPROOT | lflags);
1177         if ((forcedunmount == 0) && error) {
1178                 mount_lock(mp);
1179                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1180                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1181                 mp->mnt_lflag &= ~MNT_LFORCE;
1182                 goto out;
1183         }
1184
1185         /* make sure there are no one in the mount iterations or lookup */
1186         mount_iterdrain(mp);
1187
1188         error = VFS_UNMOUNT(mp, flags, ctx);
1189         if (error) {
1190                 mount_iterreset(mp);
1191                 mount_lock(mp);
1192                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1193                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1194                 mp->mnt_lflag &= ~MNT_LFORCE;
1195                 goto out;
1196         }
1197
1198         /* increment the operations count */
1199         if (!error)
1200                 OSAddAtomic(1, &vfs_nummntops);
1201
1202         if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1203                 /* hold an io reference and drop the usecount before close */
1204                 devvp = mp->mnt_devvp;
1205                 vnode_getalways(devvp);
1206                 vnode_rele(devvp);
1207                 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1208                        ctx);
1209                 vnode_clearmountedon(devvp);
1210                 vnode_put(devvp);
1211         }
1212         lck_rw_done(&mp->mnt_rwlock);
1213         mount_list_remove(mp);
1214         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1215
1216         /* mark the mount point hook in the vp but not drop the ref yet */
1217         if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
1218                         vnode_getwithref(coveredvp);
1219                         vnode_lock_spin(coveredvp);
1220                         coveredvp->v_mountedhere = (struct mount *)0;
1221                         vnode_unlock(coveredvp);
1222                         vnode_put(coveredvp);
1223         }
1224
1225         mount_list_lock();
1226         mp->mnt_vtable->vfc_refcount--;
1227         mount_list_unlock();
1228
1229         cache_purgevfs(mp);     /* remove cache entries for this file sys */
1230         vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
1231         mount_lock(mp);
1232         mp->mnt_lflag |= MNT_LDEAD;
1233
1234         if (mp->mnt_lflag & MNT_LWAIT) {
1235                 /*
1236                  * do the wakeup here
1237                  * in case we block in mount_refdrain
1238                  * which will drop the mount lock
1239                  * and allow anyone blocked in vfs_busy
1240                  * to wakeup and see the LDEAD state
1241                  */
1242                 mp->mnt_lflag &= ~MNT_LWAIT;
1243                 wakeup((caddr_t)mp);
1244         }
1245         mount_refdrain(mp);
1246 out:
1247         if (mp->mnt_lflag & MNT_LWAIT) {
1248                 mp->mnt_lflag &= ~MNT_LWAIT;
1249                 needwakeup = 1;
1250         }
1251         mount_unlock(mp);
1252         lck_rw_done(&mp->mnt_rwlock);
1253
1254         if (needwakeup)
1255                 wakeup((caddr_t)mp);
1256         if (!error) {
1257                 if ((coveredvp != NULLVP)) {
1258                         vnode_t pvp;
1259
1260                         vnode_getwithref(coveredvp);
1261                         pvp = vnode_getparent(coveredvp);
1262                         vnode_rele(coveredvp);
1263                         vnode_lock_spin(coveredvp);
1264                         if(mp->mnt_crossref == 0) {
1265                                 vnode_unlock(coveredvp);
1266                                 mount_lock_destroy(mp);
1267 #if CONFIG_MACF
1268                                 mac_mount_label_destroy(mp);
1269 #endif
1270                                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1271                         }  else {
1272                                 coveredvp->v_lflag |= VL_MOUNTDEAD;
1273                                 vnode_unlock(coveredvp);
1274                         }
1275                         vnode_put(coveredvp);
1276
1277                         if (pvp) {
1278                                 lock_vnode_and_post(pvp, NOTE_WRITE);
1279                                 vnode_put(pvp);
1280                         }
1281                 } else if (mp->mnt_flag & MNT_ROOTFS) {
1282                                 mount_lock_destroy(mp);
1283 #if CONFIG_MACF
1284                                 mac_mount_label_destroy(mp);
1285 #endif
1286                                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1287                 } else
1288                         panic("dounmount: no coveredvp");
1289         }
1290         return (error);
1291 }
1292
1293 void
1294 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
1295 {
1296                 vnode_lock(dp);
1297                 mp->mnt_crossref--;
1298                 if (mp->mnt_crossref < 0)
1299                         panic("mount cross refs -ve");
1300                 if (((dp->v_lflag & VL_MOUNTDEAD) == VL_MOUNTDEAD) && (mp->mnt_crossref == 0)) {
1301                         dp->v_lflag &= ~VL_MOUNTDEAD;
1302                         if (need_put)
1303                                 vnode_put_locked(dp);
1304                         vnode_unlock(dp);
1305                         mount_lock_destroy(mp);
1306 #if CONFIG_MACF
1307                         mac_mount_label_destroy(mp);
1308 #endif
1309                         FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1310                         return;
1311                 }
1312                 if (need_put)
1313                         vnode_put_locked(dp);
1314                 vnode_unlock(dp);
1315 }
1316
1317
1318 /*
1319  * Sync each mounted filesystem.
1320  */
1321 #if DIAGNOSTIC
1322 int syncprt = 0;
1323 struct ctldebug debug0 = { "syncprt", &syncprt };
1324 #endif
1325
1326 int print_vmpage_stat=0;
1327
1328 static int
1329 sync_callback(mount_t mp, void * arg)
1330 {
1331         int asyncflag;
1332
1333         if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1334                         asyncflag = mp->mnt_flag & MNT_ASYNC;
1335                         mp->mnt_flag &= ~MNT_ASYNC;
1336                         VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_current());
1337                         if (asyncflag)
1338                                 mp->mnt_flag |= MNT_ASYNC;
1339         }
1340         return(VFS_RETURNED);
1341 }
1342
1343
1344 #include <kern/clock.h>
1345
1346 clock_sec_t sync_wait_time = 0;
1347
1348 /* ARGSUSED */
1349 int
1350 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
1351 {
1352         clock_nsec_t nsecs;
1353
1354         vfs_iterate(LK_NOWAIT, sync_callback, (void *)0);
1355
1356         {
1357                 static fsid_t fsid = { { 0, 0 } };
1358
1359                 clock_get_calendar_microtime(&sync_wait_time, &nsecs);
1360                 vfs_event_signal(&fsid, VQ_SYNCEVENT, (intptr_t)NULL);
1361                 wakeup((caddr_t)&sync_wait_time);
1362         }
1363
1364         {
1365         if(print_vmpage_stat) {
1366                 vm_countdirtypages();
1367         }
1368         }
1369 #if DIAGNOSTIC
1370         if (syncprt)
1371                 vfs_bufstats();
1372 #endif /* DIAGNOSTIC */
1373         return (0);
1374 }
1375
1376 /*
1377  * Change filesystem quotas.
1378  */
1379 #if QUOTA
1380 static int quotactl_funneled(proc_t p, struct quotactl_args *uap, int32_t *retval);
1381
1382 int
1383 quotactl(proc_t p, struct quotactl_args *uap, int32_t *retval)
1384 {
1385         boolean_t funnel_state;
1386         int error;
1387
1388         funnel_state = thread_funnel_set(kernel_flock, TRUE);
1389         error = quotactl_funneled(p, uap, retval);
1390         thread_funnel_set(kernel_flock, funnel_state);
1391         return(error);
1392 }
1393
1394 static int
1395 quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
1396 {
1397         struct mount *mp;
1398         int error, quota_cmd, quota_status;
1399         caddr_t datap;
1400         size_t fnamelen;
1401         struct nameidata nd;
1402         vfs_context_t ctx = vfs_context_current();
1403         struct dqblk my_dqblk;
1404
1405         AUDIT_ARG(uid, uap->uid);
1406         AUDIT_ARG(cmd, uap->cmd);
1407         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
1408                 UIO_USERSPACE, uap->path, ctx);
1409         error = namei(&nd);
1410         if (error)
1411                 return (error);
1412         mp = nd.ni_vp->v_mount;
1413         vnode_put(nd.ni_vp);
1414         nameidone(&nd);
1415
1416         /* copyin any data we will need for downstream code */
1417         quota_cmd = uap->cmd >> SUBCMDSHIFT;
1418
1419         switch (quota_cmd) {
1420         case Q_QUOTAON:
1421                 /* uap->arg specifies a file from which to take the quotas */
1422                 fnamelen = MAXPATHLEN;
1423                 datap = kalloc(MAXPATHLEN);
1424                 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
1425                 break;
1426         case Q_GETQUOTA:
1427                 /* uap->arg is a pointer to a dqblk structure. */
1428                 datap = (caddr_t) &my_dqblk;
1429                 break;
1430         case Q_SETQUOTA:
1431         case Q_SETUSE:
1432                 /* uap->arg is a pointer to a dqblk structure. */
1433                 datap = (caddr_t) &my_dqblk;
1434                 if (proc_is64bit(p)) {
1435                         struct user_dqblk       my_dqblk64;
1436                         error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
1437                         if (error == 0) {
1438                                 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
1439                         }
1440                 }
1441                 else {
1442                         error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
1443                 }
1444                 break;
1445         case Q_QUOTASTAT:
1446                 /* uap->arg is a pointer to an integer */
1447                 datap = (caddr_t) &quota_status;
1448                 break;
1449         default:
1450                 datap = NULL;
1451                 break;
1452         } /* switch */
1453
1454         if (error == 0) {
1455                 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
1456         }
1457
1458         switch (quota_cmd) {
1459         case Q_QUOTAON:
1460                 if (datap != NULL)
1461                         kfree(datap, MAXPATHLEN);
1462                 break;
1463         case Q_GETQUOTA:
1464                 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
1465                 if (error == 0) {
1466                         if (proc_is64bit(p)) {
1467                                 struct user_dqblk       my_dqblk64;
1468                                 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
1469                                 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
1470                         }
1471                         else {
1472                                 error = copyout(datap, uap->arg, sizeof (struct dqblk));
1473                         }
1474                 }
1475                 break;
1476         case Q_QUOTASTAT:
1477                 /* uap->arg is a pointer to an integer */
1478                 if (error == 0) {
1479                         error = copyout(datap, uap->arg, sizeof(quota_status));
1480                 }
1481                 break;
1482         default:
1483                 break;
1484         } /* switch */
1485
1486         return (error);
1487 }
1488 #else
1489 int
1490 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
1491 {
1492         return (EOPNOTSUPP);
1493 }
1494 #endif /* QUOTA */
1495
1496 /*
1497  * Get filesystem statistics.
1498  *
1499  * Returns:     0                       Success
1500  *      namei:???
1501  *      vfs_update_vfsstat:???
1502  *      munge_statfs:EFAULT
1503  */
1504 /* ARGSUSED */
1505 int
1506 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
1507 {
1508         struct mount *mp;
1509         struct vfsstatfs *sp;
1510         int error;
1511         struct nameidata nd;
1512         vfs_context_t ctx = vfs_context_current();
1513         vnode_t vp;
1514
1515         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1516                 UIO_USERSPACE, uap->path, ctx);
1517         error = namei(&nd);
1518         if (error)
1519                 return (error);
1520         vp = nd.ni_vp;
1521         mp = vp->v_mount;
1522         sp = &mp->mnt_vfsstat;
1523         nameidone(&nd);
1524
1525         error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
1526         vnode_put(vp);
1527         if (error != 0)
1528                 return (error);
1529
1530         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
1531         return (error);
1532 }
1533
1534 /*
1535  * Get filesystem statistics.
1536  */
1537 /* ARGSUSED */
1538 int
1539 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
1540 {
1541         vnode_t vp;
1542         struct mount *mp;
1543         struct vfsstatfs *sp;
1544         int error;
1545
1546         AUDIT_ARG(fd, uap->fd);
1547
1548         if ( (error = file_vnode(uap->fd, &vp)) )
1549                 return (error);
1550
1551         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
1552
1553         mp = vp->v_mount;
1554         if (!mp) {
1555                 file_drop(uap->fd);
1556                 return (EBADF);
1557         }
1558         sp = &mp->mnt_vfsstat;
1559         if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
1560                 file_drop(uap->fd);
1561                 return (error);
1562         }
1563         file_drop(uap->fd);
1564
1565         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
1566
1567         return (error);
1568 }
1569
1570 /*
1571  * Common routine to handle copying of statfs64 data to user space
1572  */
1573 static int
1574 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
1575 {
1576         int error;
1577         struct statfs64 sfs;
1578
1579         bzero(&sfs, sizeof(sfs));
1580
1581         sfs.f_bsize = sfsp->f_bsize;
1582         sfs.f_iosize = (int32_t)sfsp->f_iosize;
1583         sfs.f_blocks = sfsp->f_blocks;
1584         sfs.f_bfree = sfsp->f_bfree;
1585         sfs.f_bavail = sfsp->f_bavail;
1586         sfs.f_files = sfsp->f_files;
1587         sfs.f_ffree = sfsp->f_ffree;
1588         sfs.f_fsid = sfsp->f_fsid;
1589         sfs.f_owner = sfsp->f_owner;
1590         sfs.f_type = mp->mnt_vtable->vfc_typenum;
1591         sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1592         sfs.f_fssubtype = sfsp->f_fssubtype;
1593         strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
1594         strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
1595         strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
1596
1597         error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
1598
1599         return(error);
1600 }
1601
1602 /*
1603  * Get file system statistics in 64-bit mode
1604  */
1605 int
1606 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
1607 {
1608         struct mount *mp;
1609         struct vfsstatfs *sp;
1610         int error;
1611         struct nameidata nd;
1612         vfs_context_t ctxp = vfs_context_current();
1613         vnode_t vp;
1614
1615         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1616                 UIO_USERSPACE, uap->path, ctxp);
1617         error = namei(&nd);
1618         if (error)
1619                 return (error);
1620         vp = nd.ni_vp;
1621         mp = vp->v_mount;
1622         sp = &mp->mnt_vfsstat;
1623         nameidone(&nd);
1624
1625         error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
1626         vnode_put(vp);
1627         if (error != 0)
1628                 return (error);
1629
1630         error = statfs64_common(mp, sp, uap->buf);
1631
1632         return (error);
1633 }
1634
1635 /*
1636  * Get file system statistics in 64-bit mode
1637  */
1638 int
1639 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
1640 {
1641         struct vnode *vp;
1642         struct mount *mp;
1643         struct vfsstatfs *sp;
1644         int error;
1645
1646         AUDIT_ARG(fd, uap->fd);
1647
1648         if ( (error = file_vnode(uap->fd, &vp)) )
1649                 return (error);
1650
1651         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
1652
1653         mp = vp->v_mount;
1654         if (!mp) {
1655                 file_drop(uap->fd);
1656                 return (EBADF);
1657         }
1658         sp = &mp->mnt_vfsstat;
1659         if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
1660                 file_drop(uap->fd);
1661                 return (error);
1662         }
1663         file_drop(uap->fd);
1664
1665         error = statfs64_common(mp, sp, uap->buf);
1666
1667         return (error);
1668 }
1669
1670 struct getfsstat_struct {
1671         user_addr_t     sfsp;
1672         user_addr_t     *mp;
1673         int             count;
1674         int             maxcount;
1675         int             flags;
1676         int             error;
1677 };
1678
1679
1680 static int
1681 getfsstat_callback(mount_t mp, void * arg)
1682 {
1683
1684         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
1685         struct vfsstatfs *sp;
1686         int error, my_size;
1687         vfs_context_t ctx = vfs_context_current();
1688
1689         if (fstp->sfsp && fstp->count < fstp->maxcount) {
1690                 sp = &mp->mnt_vfsstat;
1691                 /*
1692                  * If MNT_NOWAIT is specified, do not refresh the
1693                  * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
1694                  */
1695                 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
1696                         (error = vfs_update_vfsstat(mp, ctx,
1697                             VFS_USER_EVENT))) {
1698                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
1699                         return(VFS_RETURNED);
1700                 }
1701
1702                 /*
1703                  * Need to handle LP64 version of struct statfs
1704                  */
1705                 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
1706                 if (error) {
1707                         fstp->error = error;
1708                         return(VFS_RETURNED_DONE);
1709                 }
1710                 fstp->sfsp += my_size;
1711
1712                 if (fstp->mp) {
1713                         error = mac_mount_label_get(mp, *fstp->mp);
1714                         if (error) {
1715                                 fstp->error = error;
1716                                 return(VFS_RETURNED_DONE);
1717                         }
1718                         fstp->mp++;
1719                 }
1720         }
1721         fstp->count++;
1722         return(VFS_RETURNED);
1723 }
1724
1725 /*
1726  * Get statistics on all filesystems.
1727  */
1728 int
1729 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
1730 {
1731         struct __mac_getfsstat_args muap;
1732
1733         muap.buf = uap->buf;
1734         muap.bufsize = uap->bufsize;
1735         muap.mac = USER_ADDR_NULL;
1736         muap.macsize = 0;
1737         muap.flags = uap->flags;
1738
1739         return (__mac_getfsstat(p, &muap, retval));
1740 }
1741
1742 /*
1743  * __mac_getfsstat: Get MAC-related file system statistics
1744  *
1745  * Parameters:    p                        (ignored)
1746  *                uap                      User argument descriptor (see below)
1747  *                retval                   Count of file system statistics (N stats)
1748  *
1749  * Indirect:      uap->bufsize             Buffer size
1750  *                uap->macsize             MAC info size
1751  *                uap->buf                 Buffer where information will be returned
1752  *                uap->mac                 MAC info
1753  *                uap->flags               File system flags
1754  *
1755  *
1756  * Returns:        0                       Success
1757  *                !0                       Not success
1758  *
1759  */
1760 int
1761 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
1762 {
1763         user_addr_t sfsp;
1764         user_addr_t *mp;
1765         size_t count, maxcount, bufsize, macsize;
1766         struct getfsstat_struct fst;
1767
1768         bufsize = (size_t) uap->bufsize;
1769         macsize = (size_t) uap->macsize;
1770
1771         if (IS_64BIT_PROCESS(p)) {
1772                 maxcount = bufsize / sizeof(struct user64_statfs);
1773         }
1774         else {
1775                 maxcount = bufsize / sizeof(struct user32_statfs);
1776         }
1777         sfsp = uap->buf;
1778         count = 0;
1779
1780         mp = NULL;
1781
1782 #if CONFIG_MACF
1783         if (uap->mac != USER_ADDR_NULL) {
1784                 u_int32_t *mp0;
1785                 int error;
1786                 unsigned int i;
1787
1788                 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
1789                 if (count != maxcount)
1790                         return (EINVAL);
1791
1792                 /* Copy in the array */
1793                 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
1794                 if (mp0 == NULL) {
1795                         return (ENOMEM);
1796                 }
1797
1798                 error = copyin(uap->mac, mp0, macsize);
1799                 if (error) {
1800                         FREE(mp0, M_MACTEMP);
1801                         return (error);
1802                 }
1803
1804                 /* Normalize to an array of user_addr_t */
1805                 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
1806                 if (mp == NULL) {
1807                         FREE(mp0, M_MACTEMP);
1808                         return (ENOMEM);
1809                 }
1810
1811                 for (i = 0; i < count; i++) {
1812                         if (IS_64BIT_PROCESS(p))
1813                                 mp[i] = ((user_addr_t *)mp0)[i];
1814                         else
1815                                 mp[i] = (user_addr_t)mp0[i];
1816                 }
1817                 FREE(mp0, M_MACTEMP);
1818         }
1819 #endif
1820
1821
1822         fst.sfsp = sfsp;
1823         fst.mp = mp;
1824         fst.flags = uap->flags;
1825         fst.count = 0;
1826         fst.error = 0;
1827         fst.maxcount = maxcount;
1828
1829
1830         vfs_iterate(0, getfsstat_callback, &fst);
1831
1832         if (mp)
1833                 FREE(mp, M_MACTEMP);
1834
1835         if (fst.error ) {
1836                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
1837                 return(fst.error);
1838         }
1839
1840         if (fst.sfsp && fst.count > fst.maxcount)
1841                 *retval = fst.maxcount;
1842         else
1843                 *retval = fst.count;
1844         return (0);
1845 }
1846
1847 static int
1848 getfsstat64_callback(mount_t mp, void * arg)
1849 {
1850         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
1851         struct vfsstatfs *sp;
1852         int error;
1853
1854         if (fstp->sfsp && fstp->count < fstp->maxcount) {
1855                 sp = &mp->mnt_vfsstat;
1856                 /*
1857                  * If MNT_NOWAIT is specified, do not refresh the fsstat
1858                  * cache. MNT_WAIT overrides MNT_NOWAIT.
1859                  *
1860                  * We treat MNT_DWAIT as MNT_WAIT for all instances of
1861                  * getfsstat, since the constants are out of the same
1862                  * namespace.
1863                  */
1864                 if (((fstp->flags & MNT_NOWAIT) == 0 ||
1865                      (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
1866                     (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
1867                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
1868                         return(VFS_RETURNED);
1869                 }
1870
1871                 error = statfs64_common(mp, sp, fstp->sfsp);
1872                 if (error) {
1873                         fstp->error = error;
1874                         return(VFS_RETURNED_DONE);
1875                 }
1876                 fstp->sfsp += sizeof(struct statfs64);
1877         }
1878         fstp->count++;
1879         return(VFS_RETURNED);
1880 }
1881
1882 /*
1883  * Get statistics on all file systems in 64 bit mode.
1884  */
1885 int
1886 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
1887 {
1888         user_addr_t sfsp;
1889         int count, maxcount;
1890         struct getfsstat_struct fst;
1891
1892         maxcount = uap->bufsize / sizeof(struct statfs64);
1893
1894         sfsp = uap->buf;
1895         count = 0;
1896
1897         fst.sfsp = sfsp;
1898         fst.flags = uap->flags;
1899         fst.count = 0;
1900         fst.error = 0;
1901         fst.maxcount = maxcount;
1902
1903         vfs_iterate(0, getfsstat64_callback, &fst);
1904
1905         if (fst.error ) {
1906                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
1907                 return(fst.error);
1908         }
1909
1910         if (fst.sfsp && fst.count > fst.maxcount)
1911                 *retval = fst.maxcount;
1912         else
1913                 *retval = fst.count;
1914
1915         return (0);
1916 }
1917
1918 /*
1919  * Change current working directory to a given file descriptor.
1920  */
1921 /* ARGSUSED */
1922 static int
1923 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
1924 {
1925         struct filedesc *fdp = p->p_fd;
1926         vnode_t vp;
1927         vnode_t tdp;
1928         vnode_t tvp;
1929         struct mount *mp;
1930         int error;
1931         vfs_context_t ctx = vfs_context_current();
1932
1933         AUDIT_ARG(fd, uap->fd);
1934         if (per_thread && uap->fd == -1) {
1935                 /*
1936                  * Switching back from per-thread to per process CWD; verify we
1937                  * in fact have one before proceeding.  The only success case
1938                  * for this code path is to return 0 preemptively after zapping
1939                  * the thread structure contents.
1940                  */
1941                 thread_t th = vfs_context_thread(ctx);
1942                 if (th) {
1943                         uthread_t uth = get_bsdthread_info(th);
1944                         tvp = uth->uu_cdir;
1945                         uth->uu_cdir = NULLVP;
1946                         if (tvp != NULLVP) {
1947                                 vnode_rele(tvp);
1948                                 return (0);
1949                         }
1950                 }
1951                 return (EBADF);
1952         }
1953
1954         if ( (error = file_vnode(uap->fd, &vp)) )
1955                 return(error);
1956         if ( (error = vnode_getwithref(vp)) ) {
1957                 file_drop(uap->fd);
1958                 return(error);
1959         }
1960
1961         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1962
1963         if (vp->v_type != VDIR) {
1964                 error = ENOTDIR;
1965                 goto out;
1966         }
1967
1968 #if CONFIG_MACF
1969         error = mac_vnode_check_chdir(ctx, vp);
1970         if (error)
1971                 goto out;
1972 #endif
1973         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
1974         if (error)
1975                 goto out;
1976
1977         while (!error && (mp = vp->v_mountedhere) != NULL) {
1978                 if (vfs_busy(mp, LK_NOWAIT)) {
1979                         error = EACCES;
1980                         goto out;
1981                 }
1982                 error = VFS_ROOT(mp, &tdp, ctx);
1983                 vfs_unbusy(mp);
1984                 if (error)
1985                         break;
1986                 vnode_put(vp);
1987                 vp = tdp;
1988         }
1989         if (error)
1990                 goto out;
1991         if ( (error = vnode_ref(vp)) )
1992                 goto out;
1993         vnode_put(vp);
1994
1995         if (per_thread) {
1996                 thread_t th = vfs_context_thread(ctx);
1997                 if (th) {
1998                         uthread_t uth = get_bsdthread_info(th);
1999                         tvp = uth->uu_cdir;
2000                         uth->uu_cdir = vp;
2001                         OSBitOrAtomic(P_THCWD, &p->p_flag);
2002                 } else {
2003                         vnode_rele(vp);
2004                         return (ENOENT);
2005                 }
2006         } else {
2007                 proc_fdlock(p);
2008                 tvp = fdp->fd_cdir;
2009                 fdp->fd_cdir = vp;
2010                 proc_fdunlock(p);
2011         }
2012
2013         if (tvp)
2014                 vnode_rele(tvp);
2015         file_drop(uap->fd);
2016
2017         return (0);
2018 out:
2019         vnode_put(vp);
2020         file_drop(uap->fd);
2021
2022         return(error);
2023 }
2024
2025 int
2026 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
2027 {
2028         return common_fchdir(p, uap, 0);
2029 }
2030
2031 int
2032 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
2033 {
2034         return common_fchdir(p, (void *)uap, 1);
2035 }
2036
2037 /*
2038  * Change current working directory (".").
2039  *
2040  * Returns:     0                       Success
2041  *      change_dir:ENOTDIR
2042  *      change_dir:???
2043  *      vnode_ref:ENOENT                No such file or directory
2044  */
2045 /* ARGSUSED */
2046 static int
2047 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
2048 {
2049         struct filedesc *fdp = p->p_fd;
2050         int error;
2051         struct nameidata nd;
2052         vnode_t tvp;
2053         vfs_context_t ctx = vfs_context_current();
2054
2055         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
2056                 UIO_USERSPACE, uap->path, ctx);
2057         error = change_dir(&nd, ctx);
2058         if (error)
2059                 return (error);
2060         if ( (error = vnode_ref(nd.ni_vp)) ) {
2061                 vnode_put(nd.ni_vp);
2062                 return (error);
2063         }
2064         /*
2065          * drop the iocount we picked up in change_dir
2066          */
2067         vnode_put(nd.ni_vp);
2068
2069         if (per_thread) {
2070                 thread_t th = vfs_context_thread(ctx);
2071                 if (th) {
2072                         uthread_t uth = get_bsdthread_info(th);
2073                         tvp = uth->uu_cdir;
2074                         uth->uu_cdir = nd.ni_vp;
2075                         OSBitOrAtomic(P_THCWD, &p->p_flag);
2076                 } else {
2077                         vnode_rele(nd.ni_vp);
2078                         return (ENOENT);
2079                 }
2080         } else {
2081                 proc_fdlock(p);
2082                 tvp = fdp->fd_cdir;
2083                 fdp->fd_cdir = nd.ni_vp;
2084                 proc_fdunlock(p);
2085         }
2086
2087         if (tvp)
2088                 vnode_rele(tvp);
2089
2090         return (0);
2091 }
2092
2093
2094 /*
2095  * chdir
2096  *
2097  * Change current working directory (".") for the entire process
2098  *
2099  * Parameters:  p       Process requesting the call
2100  *              uap     User argument descriptor (see below)
2101  *              retval  (ignored)
2102  *
2103  * Indirect parameters: uap->path       Directory path
2104  *
2105  * Returns:     0                       Success
2106  *              common_chdir: ENOTDIR
2107  *              common_chdir: ENOENT    No such file or directory
2108  *              common_chdir: ???
2109  *
2110  */
2111 int
2112 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
2113 {
2114         return common_chdir(p, (void *)uap, 0);
2115 }
2116
2117 /*
2118  * __pthread_chdir
2119  *
2120  * Change current working directory (".") for a single thread
2121  *
2122  * Parameters:  p       Process requesting the call
2123  *              uap     User argument descriptor (see below)
2124  *              retval  (ignored)
2125  *
2126  * Indirect parameters: uap->path       Directory path
2127  *
2128  * Returns:     0                       Success
2129  *              common_chdir: ENOTDIR
2130  *              common_chdir: ENOENT    No such file or directory
2131  *              common_chdir: ???
2132  *
2133  */
2134 int
2135 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
2136 {
2137         return common_chdir(p, (void *)uap, 1);
2138 }
2139
2140
2141 /*
2142  * Change notion of root (``/'') directory.
2143  */
2144 /* ARGSUSED */
2145 int
2146 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
2147 {
2148         struct filedesc *fdp = p->p_fd;
2149         int error;
2150         struct nameidata nd;
2151         vnode_t tvp;
2152         vfs_context_t ctx = vfs_context_current();
2153
2154         if ((error = suser(kauth_cred_get(), &p->p_acflag)))
2155                 return (error);
2156
2157         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
2158                 UIO_USERSPACE, uap->path, ctx);
2159         error = change_dir(&nd, ctx);
2160         if (error)
2161                 return (error);
2162
2163 #if CONFIG_MACF
2164         error = mac_vnode_check_chroot(ctx, nd.ni_vp,
2165             &nd.ni_cnd);
2166         if (error) {
2167                 vnode_put(nd.ni_vp);
2168                 return (error);
2169         }
2170 #endif
2171
2172         if ( (error = vnode_ref(nd.ni_vp)) ) {
2173                 vnode_put(nd.ni_vp);
2174                 return (error);
2175         }
2176         vnode_put(nd.ni_vp);
2177
2178         proc_fdlock(p);
2179         tvp = fdp->fd_rdir;
2180         fdp->fd_rdir = nd.ni_vp;
2181         fdp->fd_flags |= FD_CHROOT;
2182         proc_fdunlock(p);
2183
2184         if (tvp != NULL)
2185                 vnode_rele(tvp);
2186
2187         return (0);
2188 }
2189
2190 /*
2191  * Common routine for chroot and chdir.
2192  *
2193  * Returns:     0                       Success
2194  *              ENOTDIR                 Not a directory
2195  *              namei:???               [anything namei can return]
2196  *              vnode_authorize:???     [anything vnode_authorize can return]
2197  */
2198 static int
2199 change_dir(struct nameidata *ndp, vfs_context_t ctx)
2200 {
2201         vnode_t vp;
2202         int error;
2203
2204         if ((error = namei(ndp)))
2205                 return (error);
2206         nameidone(ndp);
2207         vp = ndp->ni_vp;
2208
2209         if (vp->v_type != VDIR) {
2210                 vnode_put(vp);
2211                 return (ENOTDIR);
2212         }
2213
2214 #if CONFIG_MACF
2215         error = mac_vnode_check_chdir(ctx, vp);
2216         if (error) {
2217                 vnode_put(vp);
2218                 return (error);
2219         }
2220 #endif
2221
2222         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2223         if (error) {
2224                 vnode_put(vp);
2225                 return (error);
2226         }
2227
2228         return (error);
2229 }
2230
2231 /*
2232  * Check permissions, allocate an open file structure,
2233  * and call the device open routine if any.
2234  *
2235  * Returns:     0                       Success
2236  *              EINVAL
2237  *              EINTR
2238  *      falloc:ENFILE
2239  *      falloc:EMFILE
2240  *      falloc:ENOMEM
2241  *      vn_open_auth:???
2242  *      dupfdopen:???
2243  *      VNOP_ADVLOCK:???
2244  *      vnode_setsize:???
2245  *
2246  * XXX Need to implement uid, gid
2247  */
2248 int
2249 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *vap, int32_t *retval)
2250 {
2251         proc_t p = vfs_context_proc(ctx);
2252         uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2253         struct filedesc *fdp = p->p_fd;
2254         struct fileproc *fp;
2255         vnode_t vp;
2256         int flags, oflags;
2257         struct fileproc *nfp;
2258         int type, indx, error;
2259         struct flock lf;
2260         int no_controlling_tty = 0;
2261         int deny_controlling_tty = 0;
2262         struct session *sessp = SESSION_NULL;
2263         struct vfs_context context = *vfs_context_current();    /* local copy */
2264
2265         oflags = uflags;
2266
2267         if ((oflags & O_ACCMODE) == O_ACCMODE)
2268                 return(EINVAL);
2269         flags = FFLAGS(uflags);
2270
2271         AUDIT_ARG(fflags, oflags);
2272         AUDIT_ARG(mode, vap->va_mode);
2273
2274         if ( (error = falloc(p, &nfp, &indx, ctx)) ) {
2275                 return (error);
2276         }
2277         fp = nfp;
2278         uu->uu_dupfd = -indx - 1;
2279
2280         if (!(p->p_flag & P_CONTROLT)) {
2281                 sessp = proc_session(p);
2282                 no_controlling_tty = 1;
2283                 /*
2284                  * If conditions would warrant getting a controlling tty if
2285                  * the device being opened is a tty (see ttyopen in tty.c),
2286                  * but the open flags deny it, set a flag in the session to
2287                  * prevent it.
2288                  */
2289                 if (SESS_LEADER(p, sessp) &&
2290                     sessp->s_ttyvp == NULL &&
2291                     (flags & O_NOCTTY)) {
2292                         session_lock(sessp);
2293                         sessp->s_flags |= S_NOCTTY;
2294                         session_unlock(sessp);
2295                         deny_controlling_tty = 1;
2296                 }
2297         }
2298
2299         if ((error = vn_open_auth(ndp, &flags, vap))) {
2300                 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){        /* XXX from fdopen */
2301                         if ((error = dupfdopen(fdp, indx, uu->uu_dupfd, flags, error)) == 0) {
2302                                 fp_drop(p, indx, NULL, 0);
2303                                 *retval = indx;
2304                                 if (deny_controlling_tty) {
2305                                         session_lock(sessp);
2306                                         sessp->s_flags &= ~S_NOCTTY;
2307                                         session_unlock(sessp);
2308                                 }
2309                                 if (sessp != SESSION_NULL)
2310                                         session_rele(sessp);
2311                                 return (0);
2312                         }
2313                 }
2314                 if (error == ERESTART)
2315                         error = EINTR;
2316                 fp_free(p, indx, fp);
2317
2318                 if (deny_controlling_tty) {
2319                         session_lock(sessp);
2320                         sessp->s_flags &= ~S_NOCTTY;
2321                         session_unlock(sessp);
2322                 }
2323                 if (sessp != SESSION_NULL)
2324                         session_rele(sessp);
2325                 return (error);
2326         }
2327         uu->uu_dupfd = 0;
2328         vp = ndp->ni_vp;
2329
2330         fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
2331         fp->f_fglob->fg_type = DTYPE_VNODE;
2332         fp->f_fglob->fg_ops = &vnops;
2333         fp->f_fglob->fg_data = (caddr_t)vp;
2334
2335         if (flags & (O_EXLOCK | O_SHLOCK)) {
2336                 lf.l_whence = SEEK_SET;
2337                 lf.l_start = 0;
2338                 lf.l_len = 0;
2339                 if (flags & O_EXLOCK)
2340                         lf.l_type = F_WRLCK;
2341                 else
2342                         lf.l_type = F_RDLCK;
2343                 type = F_FLOCK;
2344                 if ((flags & FNONBLOCK) == 0)
2345                         type |= F_WAIT;
2346 #if CONFIG_MACF
2347                 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
2348                     F_SETLK, &lf);
2349                 if (error)
2350                         goto bad;
2351 #endif
2352                 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx)))
2353                         goto bad;
2354                 fp->f_fglob->fg_flag |= FHASLOCK;
2355         }
2356
2357         /* try to truncate by setting the size attribute */
2358         if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
2359                 goto bad;
2360
2361         /*
2362          * If the open flags denied the acquisition of a controlling tty,
2363          * clear the flag in the session structure that prevented the lower
2364          * level code from assigning one.
2365          */
2366         if (deny_controlling_tty) {
2367                 session_lock(sessp);
2368                 sessp->s_flags &= ~S_NOCTTY;
2369                 session_unlock(sessp);
2370         }
2371
2372         /*
2373          * If a controlling tty was set by the tty line discipline, then we
2374          * want to set the vp of the tty into the session structure.  We have
2375          * a race here because we can't get to the vp for the tp in ttyopen,
2376          * because it's not passed as a parameter in the open path.
2377          */
2378         if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
2379                 vnode_t ttyvp;
2380                 vnode_ref(vp);
2381                 session_lock(sessp);
2382                 ttyvp = sessp->s_ttyvp;
2383                 sessp->s_ttyvp = vp;
2384                 sessp->s_ttyvid = vnode_vid(vp);
2385                 session_unlock(sessp);
2386                 if (ttyvp != NULLVP)
2387                         vnode_rele(ttyvp);
2388         }
2389
2390         vnode_put(vp);
2391
2392         proc_fdlock(p);
2393         procfdtbl_releasefd(p, indx, NULL);
2394         fp_drop(p, indx, fp, 1);
2395         proc_fdunlock(p);
2396
2397         *retval = indx;
2398
2399         if (sessp != SESSION_NULL)
2400                 session_rele(sessp);
2401         return (0);
2402 bad:
2403         if (deny_controlling_tty) {
2404                 session_lock(sessp);
2405                 sessp->s_flags &= ~S_NOCTTY;
2406                 session_unlock(sessp);
2407         }
2408         if (sessp != SESSION_NULL)
2409                 session_rele(sessp);
2410
2411         /* Modify local copy (to not damage thread copy) */
2412         context.vc_ucred = fp->f_fglob->fg_cred;
2413
2414         vn_close(vp, fp->f_fglob->fg_flag, &context);
2415         vnode_put(vp);
2416         fp_free(p, indx, fp);
2417
2418         return (error);
2419
2420 }
2421
2422 /*
2423  * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
2424  *
2425  * Parameters:  p                       Process requesting the open
2426  *              uap                     User argument descriptor (see below)
2427  *              retval                  Pointer to an area to receive the
2428  *                                      return calue from the system call
2429  *
2430  * Indirect:    uap->path               Path to open (same as 'open')
2431  *              uap->flags              Flags to open (same as 'open'
2432  *              uap->uid                UID to set, if creating
2433  *              uap->gid                GID to set, if creating
2434  *              uap->mode               File mode, if creating (same as 'open')
2435  *              uap->xsecurity          ACL to set, if creating
2436  *
2437  * Returns:     0                       Success
2438  *              !0                      errno value
2439  *
2440  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
2441  *
2442  * XXX:         We should enummerate the possible errno values here, and where
2443  *              in the code they originated.
2444  */
2445 int
2446 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
2447 {
2448         struct filedesc *fdp = p->p_fd;
2449         int ciferror;
2450         kauth_filesec_t xsecdst;
2451         struct vnode_attr va;
2452         struct nameidata nd;
2453         int cmode;
2454
2455         AUDIT_ARG(owner, uap->uid, uap->gid);
2456
2457         xsecdst = NULL;
2458         if ((uap->xsecurity != USER_ADDR_NULL) &&
2459             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
2460                 return ciferror;
2461
2462         VATTR_INIT(&va);
2463         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2464         VATTR_SET(&va, va_mode, cmode);
2465         if (uap->uid != KAUTH_UID_NONE)
2466                 VATTR_SET(&va, va_uid, uap->uid);
2467         if (uap->gid != KAUTH_GID_NONE)
2468                 VATTR_SET(&va, va_gid, uap->gid);
2469         if (xsecdst != NULL)
2470                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
2471
2472         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
2473
2474         ciferror = open1(vfs_context_current(), &nd, uap->flags, &va, retval);
2475         if (xsecdst != NULL)
2476                 kauth_filesec_free(xsecdst);
2477
2478         return ciferror;
2479 }
2480
2481 int
2482 open(proc_t p, struct open_args *uap, int32_t *retval)
2483 {
2484         __pthread_testcancel(1);
2485         return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
2486 }
2487
2488 int
2489 open_nocancel(proc_t p, struct open_nocancel_args *uap, int32_t *retval)
2490 {
2491         struct filedesc *fdp = p->p_fd;
2492         struct vnode_attr va;
2493         struct nameidata nd;
2494         int cmode;
2495
2496         VATTR_INIT(&va);
2497         /* Mask off all but regular access permissions */
2498         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2499         VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
2500
2501         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
2502
2503         return(open1(vfs_context_current(), &nd, uap->flags, &va, retval));
2504 }
2505
2506
2507 /*
2508  * Create a special file.
2509  */
2510 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
2511
2512 int
2513 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
2514 {
2515         struct vnode_attr va;
2516         vfs_context_t ctx = vfs_context_current();
2517         int error;
2518         int whiteout = 0;
2519         struct nameidata nd;
2520         vnode_t vp, dvp;
2521
2522         VATTR_INIT(&va);
2523         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2524         VATTR_SET(&va, va_rdev, uap->dev);
2525
2526         /* If it's a mknod() of a FIFO, call mkfifo1() instead */
2527         if ((uap->mode & S_IFMT) == S_IFIFO)
2528                 return(mkfifo1(ctx, uap->path, &va));
2529
2530         AUDIT_ARG(mode, uap->mode);
2531         AUDIT_ARG(value32, uap->dev);
2532
2533         if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
2534                 return (error);
2535         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
2536                 UIO_USERSPACE, uap->path, ctx);
2537         error = namei(&nd);
2538         if (error)
2539                 return (error);
2540         dvp = nd.ni_dvp;
2541         vp = nd.ni_vp;
2542
2543         if (vp != NULL) {
2544                 error = EEXIST;
2545                 goto out;
2546         }
2547
2548         switch (uap->mode & S_IFMT) {
2549         case S_IFMT:    /* used by badsect to flag bad sectors */
2550                 VATTR_SET(&va, va_type, VBAD);
2551                 break;
2552         case S_IFCHR:
2553                 VATTR_SET(&va, va_type, VCHR);
2554                 break;
2555         case S_IFBLK:
2556                 VATTR_SET(&va, va_type, VBLK);
2557                 break;
2558         case S_IFWHT:
2559                 whiteout = 1;
2560                 break;
2561         default:
2562                 error = EINVAL;
2563                 goto out;
2564         }
2565
2566 #if CONFIG_MACF
2567         if (!whiteout) {
2568                 error = mac_vnode_check_create(ctx,
2569                     nd.ni_dvp, &nd.ni_cnd, &va);
2570                 if (error)
2571                         goto out;
2572         }
2573 #endif
2574
2575         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2576                 goto out;
2577
2578         if (whiteout) {
2579                 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, CREATE, ctx);
2580         } else {
2581                 error = vn_create(dvp, &vp, &nd.ni_cnd, &va, 0, ctx);
2582         }
2583         if (error)
2584                 goto out;
2585
2586         if (vp) {
2587                 int     update_flags = 0;
2588
2589                 // Make sure the name & parent pointers are hooked up
2590                 if (vp->v_name == NULL)
2591                         update_flags |= VNODE_UPDATE_NAME;
2592                 if (vp->v_parent == NULLVP)
2593                         update_flags |= VNODE_UPDATE_PARENT;
2594
2595                 if (update_flags)
2596                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
2597
2598 #if CONFIG_FSE
2599                 add_fsevent(FSE_CREATE_FILE, ctx,
2600                     FSE_ARG_VNODE, vp,
2601                     FSE_ARG_DONE);
2602 #endif
2603         }
2604
2605 out:
2606         /*
2607          * nameidone has to happen before we vnode_put(dvp)
2608          * since it may need to release the fs_nodelock on the dvp
2609          */
2610         nameidone(&nd);
2611
2612         if (vp)
2613                 vnode_put(vp);
2614         vnode_put(dvp);
2615
2616         return (error);
2617 }
2618
2619 /*
2620  * Create a named pipe.
2621  *
2622  * Returns:     0                       Success
2623  *              EEXIST
2624  *      namei:???
2625  *      vnode_authorize:???
2626  *      vn_create:???
2627  */
2628 static int
2629 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
2630 {
2631         vnode_t vp, dvp;
2632         int error;
2633         struct nameidata nd;
2634
2635         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
2636                 UIO_USERSPACE, upath, ctx);
2637         error = namei(&nd);
2638         if (error)
2639                 return (error);
2640         dvp = nd.ni_dvp;
2641         vp = nd.ni_vp;
2642
2643         /* check that this is a new file and authorize addition */
2644         if (vp != NULL) {
2645                 error = EEXIST;
2646                 goto out;
2647         }
2648         VATTR_SET(vap, va_type, VFIFO);
2649
2650 #if CONFIG_MACF
2651         error = mac_vnode_check_create(ctx, nd.ni_dvp,
2652             &nd.ni_cnd, vap);
2653         if (error)
2654                 goto out;
2655 #endif
2656
2657
2658         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2659                 goto out;
2660
2661
2662         error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx);
2663 out:
2664         /*
2665          * nameidone has to happen before we vnode_put(dvp)
2666          * since it may need to release the fs_nodelock on the dvp
2667          */
2668         nameidone(&nd);
2669
2670         if (vp)
2671                 vnode_put(vp);
2672         vnode_put(dvp);
2673
2674         return error;
2675 }
2676
2677
2678 /*
2679  * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
2680  *
2681  * Parameters:  p                       Process requesting the open
2682  *              uap                     User argument descriptor (see below)
2683  *              retval                  (Ignored)
2684  *
2685  * Indirect:    uap->path               Path to fifo (same as 'mkfifo')
2686  *              uap->uid                UID to set
2687  *              uap->gid                GID to set
2688  *              uap->mode               File mode to set (same as 'mkfifo')
2689  *              uap->xsecurity          ACL to set, if creating
2690  *
2691  * Returns:     0                       Success
2692  *              !0                      errno value
2693  *
2694  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
2695  *
2696  * XXX:         We should enummerate the possible errno values here, and where
2697  *              in the code they originated.
2698  */
2699 int
2700 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
2701 {
2702         int ciferror;
2703         kauth_filesec_t xsecdst;
2704         struct vnode_attr va;
2705
2706         AUDIT_ARG(owner, uap->uid, uap->gid);
2707
2708         xsecdst = KAUTH_FILESEC_NONE;
2709         if (uap->xsecurity != USER_ADDR_NULL) {
2710                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
2711                         return ciferror;
2712         }
2713
2714         VATTR_INIT(&va);
2715         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2716         if (uap->uid != KAUTH_UID_NONE)
2717                 VATTR_SET(&va, va_uid, uap->uid);
2718         if (uap->gid != KAUTH_GID_NONE)
2719                 VATTR_SET(&va, va_gid, uap->gid);
2720         if (xsecdst != KAUTH_FILESEC_NONE)
2721                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
2722
2723         ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
2724
2725         if (xsecdst != KAUTH_FILESEC_NONE)
2726                 kauth_filesec_free(xsecdst);
2727         return ciferror;
2728 }
2729
2730 /* ARGSUSED */
2731 int
2732 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
2733 {
2734         struct vnode_attr va;
2735
2736         VATTR_INIT(&va);
2737         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2738
2739         return(mkfifo1(vfs_context_current(), uap->path, &va));
2740 }
2741
2742
2743 static char *
2744 my_strrchr(char *p, int ch)
2745 {
2746         char *save;
2747
2748         for (save = NULL;; ++p) {
2749                 if (*p == ch)
2750                         save = p;
2751                 if (!*p)
2752                         return(save);
2753         }
2754         /* NOTREACHED */
2755 }
2756
2757 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
2758
2759 int
2760 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
2761 {
2762         int ret, len = _len;
2763
2764         *truncated_path = 0;
2765         ret = vn_getpath(dvp, path, &len);
2766         if (ret == 0 && len < (MAXPATHLEN - 1)) {
2767                 if (leafname) {
2768                         path[len-1] = '/';
2769                         len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
2770                         if (len > MAXPATHLEN) {
2771                                 char *ptr;
2772
2773                                 // the string got truncated!
2774                                 *truncated_path = 1;
2775                                 ptr = my_strrchr(path, '/');
2776                                 if (ptr) {
2777                                         *ptr = '\0';   // chop off the string at the last directory component
2778                                 }
2779                                 len = strlen(path) + 1;
2780                         }
2781                 }
2782         } else if (ret == 0) {
2783                 *truncated_path = 1;
2784         } else if (ret != 0) {
2785                 struct vnode *mydvp=dvp;
2786
2787                 if (ret != ENOSPC) {
2788                         printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
2789                                dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
2790                 }
2791                 *truncated_path = 1;
2792
2793                 do {
2794                         if (mydvp->v_parent != NULL) {
2795                                 mydvp = mydvp->v_parent;
2796                         } else if (mydvp->v_mount) {
2797                                 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
2798                                 break;
2799                         } else {
2800                                 // no parent and no mount point?  only thing is to punt and say "/" changed
2801                                 strlcpy(path, "/", _len);
2802                                 len = 2;
2803                                 mydvp = NULL;
2804                         }
2805
2806                         if (mydvp == NULL) {
2807                                 break;
2808                         }
2809
2810                         len = _len;
2811                         ret = vn_getpath(mydvp, path, &len);
2812                 } while (ret == ENOSPC);
2813         }
2814
2815         return len;
2816 }
2817
2818
2819 /*
2820  * Make a hard file link.
2821  *
2822  * Returns:     0                       Success
2823  *              EPERM
2824  *              EEXIST
2825  *              EXDEV
2826  *      namei:???
2827  *      vnode_authorize:???
2828  *      VNOP_LINK:???
2829  */
2830 /* ARGSUSED */
2831 int
2832 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
2833 {
2834         vnode_t vp, dvp, lvp;
2835         struct nameidata nd;
2836         vfs_context_t ctx = vfs_context_current();
2837         int error;
2838 #if CONFIG_FSE
2839         fse_info finfo;
2840 #endif
2841         int need_event, has_listeners;
2842         char *target_path = NULL;
2843         int truncated=0;
2844
2845         vp = dvp = lvp = NULLVP;
2846
2847         /* look up the object we are linking to */
2848         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
2849                 UIO_USERSPACE, uap->path, ctx);
2850         error = namei(&nd);
2851         if (error)
2852                 return (error);
2853         vp = nd.ni_vp;
2854
2855         nameidone(&nd);
2856
2857         /*
2858          * Normally, linking to directories is not supported.
2859          * However, some file systems may have limited support.
2860          */
2861         if (vp->v_type == VDIR) {
2862                 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
2863                         error = EPERM;   /* POSIX */
2864                         goto out;
2865                 }
2866                 /* Linking to a directory requires ownership. */
2867                 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
2868                         struct vnode_attr dva;
2869
2870                         VATTR_INIT(&dva);
2871                         VATTR_WANTED(&dva, va_uid);
2872                         if (vnode_getattr(vp, &dva, ctx) != 0 ||
2873                             !VATTR_IS_SUPPORTED(&dva, va_uid) ||
2874                             (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
2875                                 error = EACCES;
2876                                 goto out;
2877                         }
2878                 }
2879         }
2880
2881         /* lookup the target node */
2882         nd.ni_cnd.cn_nameiop = CREATE;
2883         nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
2884         nd.ni_dirp = uap->link;
2885         error = namei(&nd);
2886         if (error != 0)
2887                 goto out;
2888         dvp = nd.ni_dvp;
2889         lvp = nd.ni_vp;
2890
2891 #if CONFIG_MACF
2892         if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
2893                 goto out2;
2894 #endif
2895
2896         /* or to anything that kauth doesn't want us to (eg. immutable items) */
2897         if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
2898                 goto out2;
2899
2900         /* target node must not exist */
2901         if (lvp != NULLVP) {
2902                 error = EEXIST;
2903                 goto out2;
2904         }
2905         /* cannot link across mountpoints */
2906         if (vnode_mount(vp) != vnode_mount(dvp)) {
2907                 error = EXDEV;
2908                 goto out2;
2909         }
2910
2911         /* authorize creation of the target note */
2912         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2913                 goto out2;
2914
2915         /* and finally make the link */
2916         error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
2917         if (error)
2918                 goto out2;
2919
2920 #if CONFIG_FSE
2921         need_event = need_fsevent(FSE_CREATE_FILE, dvp);
2922 #else
2923         need_event = 0;
2924 #endif
2925         has_listeners = kauth_authorize_fileop_has_listeners();
2926
2927         if (need_event || has_listeners) {
2928                 char *link_to_path = NULL;
2929                 int len, link_name_len;
2930
2931                 /* build the path to the new link file */
2932                 GET_PATH(target_path);
2933                 if (target_path == NULL) {
2934                         error = ENOMEM;
2935                         goto out2;
2936                 }
2937
2938                 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
2939
2940                 if (has_listeners) {
2941                         /* build the path to file we are linking to */
2942                         GET_PATH(link_to_path);
2943                         if (link_to_path == NULL) {
2944                                 error = ENOMEM;
2945                                 goto out2;
2946                         }
2947
2948                         link_name_len = MAXPATHLEN;
2949                         vn_getpath(vp, link_to_path, &link_name_len);
2950
2951                         /*
2952                          * Call out to allow 3rd party notification of rename.
2953                          * Ignore result of kauth_authorize_fileop call.
2954                          */
2955                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
2956                                                (uintptr_t)link_to_path, (uintptr_t)target_path);
2957                         if (link_to_path != NULL) {
2958                                 RELEASE_PATH(link_to_path);
2959                         }
2960                 }
2961 #if CONFIG_FSE
2962                 if (need_event) {
2963                         /* construct fsevent */
2964                         if (get_fse_info(vp, &finfo, ctx) == 0) {
2965                                 if (truncated) {
2966                                         finfo.mode |= FSE_TRUNCATED_PATH;
2967                                 }
2968
2969                                 // build the path to the destination of the link
2970                                 add_fsevent(FSE_CREATE_FILE, ctx,
2971                                             FSE_ARG_STRING, len, target_path,
2972                                             FSE_ARG_FINFO, &finfo,
2973                                             FSE_ARG_DONE);
2974                         }
2975                         if (vp->v_parent) {
2976                             add_fsevent(FSE_STAT_CHANGED, ctx,
2977                                 FSE_ARG_VNODE, vp->v_parent,
2978                                 FSE_ARG_DONE);
2979                         }
2980                 }
2981 #endif
2982         }
2983 out2:
2984         /*
2985          * nameidone has to happen before we vnode_put(dvp)
2986          * since it may need to release the fs_nodelock on the dvp
2987          */
2988         nameidone(&nd);
2989         if (target_path != NULL) {
2990                 RELEASE_PATH(target_path);
2991         }
2992 out:
2993         if (lvp)
2994                 vnode_put(lvp);
2995         if (dvp)
2996                 vnode_put(dvp);
2997         vnode_put(vp);
2998         return (error);
2999 }
3000
3001 /*
3002  * Make a symbolic link.
3003  *
3004  * We could add support for ACLs here too...
3005  */
3006 /* ARGSUSED */
3007 int
3008 symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval)
3009 {
3010         struct vnode_attr va;
3011         char *path;
3012         int error;
3013         struct nameidata nd;
3014         vfs_context_t ctx = vfs_context_current();
3015         vnode_t vp, dvp;
3016         size_t dummy=0;
3017
3018         MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
3019         error = copyinstr(uap->path, path, MAXPATHLEN, &dummy);
3020         if (error)
3021                 goto out;
3022         AUDIT_ARG(text, path);  /* This is the link string */
3023
3024         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
3025                 UIO_USERSPACE, uap->link, ctx);
3026         error = namei(&nd);
3027         if (error)
3028                 goto out;
3029         dvp = nd.ni_dvp;
3030         vp = nd.ni_vp;
3031
3032         VATTR_INIT(&va);
3033         VATTR_SET(&va, va_type, VLNK);
3034         VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
3035 #if CONFIG_MACF
3036         error = mac_vnode_check_create(ctx,
3037                         dvp, &nd.ni_cnd, &va);
3038 #endif
3039         if (error != 0) {
3040             goto skipit;
3041         }
3042
3043         if (vp != NULL) {
3044             error = EEXIST;
3045             goto skipit;
3046         }
3047
3048         /* authorize */
3049         if (error == 0)
3050                 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
3051         /* get default ownership, etc. */
3052         if (error == 0)
3053                 error = vnode_authattr_new(dvp, &va, 0, ctx);
3054         if (error == 0)
3055                 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
3056
3057         /* do fallback attribute handling */
3058         if (error == 0)
3059                 error = vnode_setattr_fallback(vp, &va, ctx);
3060
3061         if (error == 0) {
3062                 int     update_flags = 0;
3063
3064                 if (vp == NULL) {
3065                         nd.ni_cnd.cn_nameiop = LOOKUP;
3066                         nd.ni_cnd.cn_flags = 0;
3067                         error = namei(&nd);
3068                         vp = nd.ni_vp;
3069
3070                         if (vp == NULL)
3071                                 goto skipit;
3072                 }
3073
3074 #if 0  /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
3075                 /* call out to allow 3rd party notification of rename.
3076                  * Ignore result of kauth_authorize_fileop call.
3077                  */
3078                 if (kauth_authorize_fileop_has_listeners() &&
3079                     namei(&nd) == 0) {
3080                         char *new_link_path = NULL;
3081                         int             len;
3082
3083                         /* build the path to the new link file */
3084                         new_link_path = get_pathbuff();
3085                         len = MAXPATHLEN;
3086                         vn_getpath(dvp, new_link_path, &len);
3087                         if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
3088                                 new_link_path[len - 1] = '/';
3089                                 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
3090                         }
3091
3092                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
3093                                            (uintptr_t)path, (uintptr_t)new_link_path);
3094                         if (new_link_path != NULL)
3095                                 release_pathbuff(new_link_path);
3096                 }
3097 #endif
3098                 // Make sure the name & parent pointers are hooked up
3099                 if (vp->v_name == NULL)
3100                         update_flags |= VNODE_UPDATE_NAME;
3101                 if (vp->v_parent == NULLVP)
3102                         update_flags |= VNODE_UPDATE_PARENT;
3103
3104                 if (update_flags)
3105                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3106
3107 #if CONFIG_FSE
3108                 add_fsevent(FSE_CREATE_FILE, ctx,
3109                             FSE_ARG_VNODE, vp,
3110                             FSE_ARG_DONE);
3111 #endif
3112         }
3113
3114 skipit:
3115         /*
3116          * nameidone has to happen before we vnode_put(dvp)
3117          * since it may need to release the fs_nodelock on the dvp
3118          */
3119         nameidone(&nd);
3120
3121         if (vp)
3122                 vnode_put(vp);
3123         vnode_put(dvp);
3124 out:
3125         FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
3126
3127         return (error);
3128 }
3129
3130 /*
3131  * Delete a whiteout from the filesystem.
3132  * XXX authorization not implmented for whiteouts
3133  */
3134 int
3135 undelete(__unused proc_t p, struct undelete_args *uap, __unused int32_t *retval)
3136 {
3137         int error;
3138         struct nameidata nd;
3139         vfs_context_t ctx = vfs_context_current();
3140         vnode_t vp, dvp;
3141
3142         NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT|AUDITVNPATH1,
3143                 UIO_USERSPACE, uap->path, ctx);
3144         error = namei(&nd);
3145         if (error)
3146                 return (error);
3147         dvp = nd.ni_dvp;
3148         vp = nd.ni_vp;
3149
3150         if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) {
3151                 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, ctx);
3152         } else
3153                 error = EEXIST;
3154
3155         /*
3156          * nameidone has to happen before we vnode_put(dvp)
3157          * since it may need to release the fs_nodelock on the dvp
3158          */
3159         nameidone(&nd);
3160
3161         if (vp)
3162                 vnode_put(vp);
3163         vnode_put(dvp);
3164
3165         return (error);
3166 }
3167
3168
3169 /*
3170  * Delete a name from the filesystem.
3171  */
3172 /* ARGSUSED */
3173 int
3174 unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy)
3175 {
3176         vnode_t vp, dvp;
3177         int error;
3178         struct componentname *cnp;
3179         char  *path = NULL;
3180         int  len=0;
3181 #if CONFIG_FSE
3182         fse_info  finfo;
3183 #endif
3184         int flags = 0;
3185         int need_event = 0;
3186         int has_listeners = 0;
3187         int truncated_path=0;
3188 #if NAMEDRSRCFORK
3189         /* unlink or delete is allowed on rsrc forks and named streams */
3190         ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
3191 #endif
3192
3193         ndp->ni_cnd.cn_flags |= LOCKPARENT;
3194         cnp = &ndp->ni_cnd;
3195
3196         error = namei(ndp);
3197         if (error)
3198                 return (error);
3199
3200         dvp = ndp->ni_dvp;
3201         vp = ndp->ni_vp;
3202
3203         /* With Carbon delete semantics, busy files cannot be deleted */
3204         if (nodelbusy) {
3205                 flags |= VNODE_REMOVE_NODELETEBUSY;
3206         }
3207
3208         /*
3209          * Normally, unlinking of directories is not supported.
3210          * However, some file systems may have limited support.
3211          */
3212         if ((vp->v_type == VDIR) &&
3213             !(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
3214                 error = EPERM;  /* POSIX */
3215         }
3216
3217         /*
3218          * The root of a mounted filesystem cannot be deleted.
3219          */
3220         if (vp->v_flag & VROOT) {
3221                 error = EBUSY;
3222         }
3223         if (error)
3224                 goto out;
3225
3226
3227         /* authorize the delete operation */
3228 #if CONFIG_MACF
3229         if (!error)
3230                 error = mac_vnode_check_unlink(ctx,
3231                     dvp, vp, cnp);
3232 #endif /* MAC */
3233         if (!error)
3234                 error = vnode_authorize(vp, ndp->ni_dvp, KAUTH_VNODE_DELETE, ctx);
3235         if (error)
3236                 goto out;
3237
3238 #if CONFIG_FSE
3239         need_event = need_fsevent(FSE_DELETE, dvp);
3240         if (need_event) {
3241                 if ((vp->v_flag & VISHARDLINK) == 0) {
3242                         get_fse_info(vp, &finfo, ctx);
3243                 }
3244         }
3245 #endif
3246         has_listeners = kauth_authorize_fileop_has_listeners();
3247         if (need_event || has_listeners) {
3248                 GET_PATH(path);
3249                 if (path == NULL) {
3250                         error = ENOMEM;
3251                         goto out;
3252                 }
3253
3254                 len = safe_getpath(dvp, ndp->ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
3255         }
3256
3257 #if NAMEDRSRCFORK
3258         if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK)
3259                 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
3260         else
3261 #endif
3262                 error = VNOP_REMOVE(dvp, vp, &ndp->ni_cnd, flags, ctx);
3263
3264         /*
3265          * Call out to allow 3rd party notification of delete.
3266          * Ignore result of kauth_authorize_fileop call.
3267          */
3268         if (!error) {
3269                 if (has_listeners) {
3270                         kauth_authorize_fileop(vfs_context_ucred(ctx),
3271                                 KAUTH_FILEOP_DELETE,
3272                                 (uintptr_t)vp,
3273                                 (uintptr_t)path);
3274                 }
3275
3276                 if (vp->v_flag & VISHARDLINK) {
3277                     //
3278                     // if a hardlink gets deleted we want to blow away the
3279                     // v_parent link because the path that got us to this
3280                     // instance of the link is no longer valid.  this will
3281                     // force the next call to get the path to ask the file
3282                     // system instead of just following the v_parent link.
3283                     //
3284                     vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
3285                 }
3286
3287 #if CONFIG_FSE
3288                 if (need_event) {
3289                         if (vp->v_flag & VISHARDLINK) {
3290                                 get_fse_info(vp, &finfo, ctx);
3291                         }
3292                         if (truncated_path) {
3293                                 finfo.mode |= FSE_TRUNCATED_PATH;
3294                         }
3295                         add_fsevent(FSE_DELETE, ctx,
3296                                                 FSE_ARG_STRING, len, path,
3297                                                 FSE_ARG_FINFO, &finfo,
3298                                                 FSE_ARG_DONE);
3299                 }
3300 #endif
3301         }
3302         if (path != NULL)
3303                 RELEASE_PATH(path);
3304
3305         /*
3306          * nameidone has to happen before we vnode_put(dvp)
3307          * since it may need to release the fs_nodelock on the dvp
3308          */
3309 out:
3310 #if NAMEDRSRCFORK
3311         /* recycle the deleted rsrc fork vnode to force a reclaim, which
3312          * will cause its shadow file to go away if necessary.
3313          */
3314          if ((vnode_isnamedstream(ndp->ni_vp)) &&
3315                 (ndp->ni_vp->v_parent != NULLVP) &&
3316                 vnode_isshadow(ndp->ni_vp)) {
3317                         vnode_recycle(ndp->ni_vp);
3318          }
3319 #endif
3320         nameidone(ndp);
3321         vnode_put(dvp);
3322         vnode_put(vp);
3323         return (error);
3324 }
3325
3326 /*
3327  * Delete a name from the filesystem using POSIX semantics.
3328  */
3329 int
3330 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
3331 {
3332         struct nameidata nd;
3333         vfs_context_t ctx = vfs_context_current();
3334
3335         NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
3336         return unlink1(ctx, &nd, 0);
3337 }
3338
3339 /*
3340  * Delete a name from the filesystem using Carbon semantics.
3341  */
3342 int
3343 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
3344 {
3345         struct nameidata nd;
3346         vfs_context_t ctx = vfs_context_current();
3347
3348         NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
3349         return unlink1(ctx, &nd, 1);
3350 }
3351
3352 /*
3353  * Reposition read/write file offset.
3354  */
3355 int
3356 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
3357 {
3358         struct fileproc *fp;
3359         vnode_t vp;
3360         struct vfs_context *ctx;
3361         off_t offset = uap->offset, file_size;
3362         int error;
3363
3364         if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
3365                 if (error == ENOTSUP)
3366                         return (ESPIPE);
3367                 return (error);
3368         }
3369         if (vnode_isfifo(vp)) {
3370                 file_drop(uap->fd);
3371                 return(ESPIPE);
3372         }
3373
3374
3375         ctx = vfs_context_current();
3376 #if CONFIG_MACF
3377         if (uap->whence == L_INCR && uap->offset == 0)
3378                 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
3379                     fp->f_fglob);
3380         else
3381                 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
3382                     fp->f_fglob);
3383         if (error) {
3384                 file_drop(uap->fd);
3385                 return (error);
3386         }
3387 #endif
3388         if ( (error = vnode_getwithref(vp)) ) {
3389                 file_drop(uap->fd);
3390                 return(error);
3391         }
3392
3393         switch (uap->whence) {
3394         case L_INCR:
3395                 offset += fp->f_fglob->fg_offset;
3396                 break;
3397         case L_XTND:
3398                 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
3399                         break;
3400                 offset += file_size;
3401                 break;
3402         case L_SET:
3403                 break;
3404         default:
3405                 error = EINVAL;
3406         }
3407         if (error == 0) {
3408                 if (uap->offset > 0 && offset < 0) {
3409                         /* Incremented/relative move past max size */
3410                         error = EOVERFLOW;
3411                 } else {
3412                         /*
3413                          * Allow negative offsets on character devices, per
3414                          * POSIX 1003.1-2001.  Most likely for writing disk
3415                          * labels.
3416                          */
3417                         if (offset < 0 && vp->v_type != VCHR) {
3418                                 /* Decremented/relative move before start */
3419                                 error = EINVAL;
3420                         } else {
3421                                 /* Success */
3422                                 fp->f_fglob->fg_offset = offset;
3423                                 *retval = fp->f_fglob->fg_offset;
3424                         }
3425                 }
3426         }
3427
3428         /*
3429          * An lseek can affect whether data is "available to read."  Use
3430          * hint of NOTE_NONE so no EVFILT_VNODE events fire
3431          */
3432         post_event_if_success(vp, error, NOTE_NONE);
3433         (void)vnode_put(vp);
3434         file_drop(uap->fd);
3435         return (error);
3436 }
3437
3438
3439 /*
3440  * Check access permissions.
3441  *
3442  * Returns:     0                       Success
3443  *              vnode_authorize:???
3444  */
3445 static int
3446 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
3447 {
3448         kauth_action_t action;
3449         int error;
3450
3451         /*
3452          * If just the regular access bits, convert them to something
3453          * that vnode_authorize will understand.
3454          */
3455         if (!(uflags & _ACCESS_EXTENDED_MASK)) {
3456                 action = 0;
3457                 if (uflags & R_OK)
3458                         action |= KAUTH_VNODE_READ_DATA;        /* aka KAUTH_VNODE_LIST_DIRECTORY */
3459                 if (uflags & W_OK) {
3460                         if (vnode_isdir(vp)) {
3461                                 action |= KAUTH_VNODE_ADD_FILE |
3462                                     KAUTH_VNODE_ADD_SUBDIRECTORY;
3463                                 /* might want delete rights here too */
3464                         } else {
3465                                 action |= KAUTH_VNODE_WRITE_DATA;
3466                         }
3467                 }
3468                 if (uflags & X_OK) {
3469                         if (vnode_isdir(vp)) {
3470                                 action |= KAUTH_VNODE_SEARCH;
3471                         } else {
3472                                 action |= KAUTH_VNODE_EXECUTE;
3473                         }
3474                 }
3475         } else {
3476                 /* take advantage of definition of uflags */
3477                 action = uflags >> 8;
3478         }
3479
3480 #if CONFIG_MACF
3481         error = mac_vnode_check_access(ctx, vp, uflags);
3482         if (error)
3483                 return (error);
3484 #endif /* MAC */
3485
3486         /* action == 0 means only check for existence */
3487         if (action != 0) {
3488                 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
3489         } else {
3490                 error = 0;
3491         }
3492
3493         return(error);
3494 }
3495
3496
3497
3498 /*
3499  * access_extended: Check access permissions in bulk.
3500  *
3501  * Description: uap->entries            Pointer to an array of accessx
3502  *                                      descriptor structs, plus one or
3503  *                                      more NULL terminated strings (see
3504  *                                      "Notes" section below).
3505  *              uap->size               Size of the area pointed to by
3506  *                                      uap->entries.
3507  *              uap->results            Pointer to the results array.
3508  *
3509  * Returns:     0                       Success
3510  *              ENOMEM                  Insufficient memory
3511  *              EINVAL                  Invalid arguments
3512  *              namei:EFAULT            Bad address
3513  *              namei:ENAMETOOLONG      Filename too long
3514  *              namei:ENOENT            No such file or directory
3515  *              namei:ELOOP             Too many levels of symbolic links
3516  *              namei:EBADF             Bad file descriptor
3517  *              namei:ENOTDIR           Not a directory
3518  *              namei:???
3519  *              access1:
3520  *
3521  * Implicit returns:
3522  *              uap->results            Array contents modified
3523  *
3524  * Notes:       The uap->entries are structured as an arbitrary length array
3525  *              of accessx descriptors, followed by one or more NULL terminated
3526  *              strings
3527  *
3528  *                      struct accessx_descriptor[0]
3529  *                      ...
3530  *                      struct accessx_descriptor[n]
3531  *                      char name_data[0];
3532  *
3533  *              We determine the entry count by walking the buffer containing
3534  *              the uap->entries argument descriptor.  For each descriptor we
3535  *              see, the valid values for the offset ad_name_offset will be
3536  *              in the byte range:
3537  *
3538  *                      [ uap->entries + sizeof(struct accessx_descriptor) ]
3539  *                                              to
3540  *                              [ uap->entries + uap->size - 2 ]
3541  *
3542  *              since we must have at least one string, and the string must
3543  *              be at least one character plus the NULL terminator in length.
3544  *
3545  * XXX:         Need to support the check-as uid argument
3546  */
3547 int
3548 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
3549 {
3550         struct accessx_descriptor *input = NULL;
3551         errno_t *result = NULL;
3552         errno_t error = 0;
3553         int wantdelete = 0;
3554         unsigned int desc_max, desc_actual, i, j;
3555         struct vfs_context context;
3556         struct nameidata nd;
3557         int niopts;
3558         vnode_t vp = NULL;
3559         vnode_t dvp = NULL;
3560 #define ACCESSX_MAX_DESCR_ON_STACK 10
3561         struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
3562
3563         context.vc_ucred = NULL;
3564
3565         /*
3566          * Validate parameters; if valid, copy the descriptor array and string
3567          * arguments into local memory.  Before proceeding, the following
3568          * conditions must have been met:
3569          *
3570          * o    The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
3571          * o    There must be sufficient room in the request for at least one
3572          *      descriptor and a one yte NUL terminated string.
3573          * o    The allocation of local storage must not fail.
3574          */
3575         if (uap->size > ACCESSX_MAX_TABLESIZE)
3576                 return(ENOMEM);
3577         if (uap->size < (sizeof(struct accessx_descriptor) + 2))
3578                 return(EINVAL);
3579         if (uap->size <= sizeof (stack_input)) {
3580                 input = stack_input;
3581         } else {
3582         MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
3583         if (input == NULL) {
3584                 error = ENOMEM;
3585                 goto out;
3586         }
3587         }
3588         error = copyin(uap->entries, input, uap->size);
3589         if (error)
3590                 goto out;
3591
3592         AUDIT_ARG(opaque, input, uap->size);
3593
3594         /*
3595          * Force NUL termination of the copyin buffer to avoid nami() running
3596          * off the end.  If the caller passes us bogus data, they may get a
3597          * bogus result.
3598          */
3599         ((char *)input)[uap->size - 1] = 0;
3600
3601         /*
3602          * Access is defined as checking against the process' real identity,
3603          * even if operations are checking the effective identity.  This
3604          * requires that we use a local vfs context.
3605          */
3606         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
3607         context.vc_thread = current_thread();
3608
3609         /*
3610          * Find out how many entries we have, so we can allocate the result
3611          * array by walking the list and adjusting the count downward by the
3612          * earliest string offset we see.
3613          */
3614         desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
3615         desc_actual = desc_max;
3616         for (i = 0; i < desc_actual; i++) {
3617                 /*
3618                  * Take the offset to the name string for this entry and
3619                  * convert to an input array index, which would be one off
3620                  * the end of the array if this entry was the lowest-addressed
3621                  * name string.
3622                  */
3623                 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
3624
3625                 /*
3626                  * An offset greater than the max allowable offset is an error.
3627                  * It is also an error for any valid entry to point
3628                  * to a location prior to the end of the current entry, if
3629                  * it's not a reference to the string of the previous entry.
3630                  */
3631                 if (j > desc_max || (j != 0 && j <= i)) {
3632                         error = EINVAL;
3633                         goto out;
3634                 }
3635
3636                 /*
3637                  * An offset of 0 means use the previous descriptor's offset;
3638                  * this is used to chain multiple requests for the same file
3639                  * to avoid multiple lookups.
3640                  */
3641                 if (j == 0) {
3642                         /* This is not valid for the first entry */
3643                         if (i == 0) {
3644                                 error = EINVAL;
3645                                 goto out;
3646                         }
3647                         continue;
3648                 }
3649
3650                 /*
3651                  * If the offset of the string for this descriptor is before
3652                  * what we believe is the current actual last descriptor,
3653                  * then we need to adjust our estimate downward; this permits
3654                  * the string table following the last descriptor to be out
3655                  * of order relative to the descriptor list.
3656                  */
3657                 if (j < desc_actual)
3658                         desc_actual = j;
3659         }
3660
3661         /*
3662          * We limit the actual number of descriptors we are willing to process
3663          * to a hard maximum of ACCESSX_MAX_DESCRIPTORS.  If the number being
3664          * requested does not exceed this limit,
3665          */
3666         if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
3667                 error = ENOMEM;
3668                 goto out;
3669         }
3670         MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
3671         if (result == NULL) {
3672                 error = ENOMEM;
3673                 goto out;
3674         }
3675
3676         /*
3677          * Do the work by iterating over the descriptor entries we know to
3678          * at least appear to contain valid data.
3679          */
3680         error = 0;
3681         for (i = 0; i < desc_actual; i++) {
3682                 /*
3683                  * If the ad_name_offset is 0, then we use the previous
3684                  * results to make the check; otherwise, we are looking up
3685                  * a new file name.
3686                  */
3687                 if (input[i].ad_name_offset != 0) {
3688                         /* discard old vnodes */
3689                         if (vp) {
3690                                 vnode_put(vp);
3691                                 vp = NULL;
3692                         }
3693                         if (dvp) {
3694                                 vnode_put(dvp);
3695                                 dvp = NULL;
3696                         }
3697
3698                         /*
3699                          * Scan forward in the descriptor list to see if we
3700                          * need the parent vnode.  We will need it if we are
3701                          * deleting, since we must have rights  to remove
3702                          * entries in the parent directory, as well as the
3703                          * rights to delete the object itself.
3704                          */
3705                         wantdelete = input[i].ad_flags & _DELETE_OK;
3706                         for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
3707                                 if (input[j].ad_flags & _DELETE_OK)
3708                                         wantdelete = 1;
3709
3710                         niopts = FOLLOW | AUDITVNPATH1;
3711
3712                         /* need parent for vnode_authorize for deletion test */
3713                         if (wantdelete)
3714                                 niopts |= WANTPARENT;
3715
3716                         /* do the lookup */
3717                         NDINIT(&nd, LOOKUP, niopts, UIO_SYSSPACE, CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset), &context);
3718                         error = namei(&nd);
3719                         if (!error) {
3720                                 vp = nd.ni_vp;
3721                                 if (wantdelete)
3722                                         dvp = nd.ni_dvp;
3723                         }
3724                         nameidone(&nd);
3725                 }
3726
3727                 /*
3728                  * Handle lookup errors.
3729                  */
3730                 switch(error) {
3731                 case ENOENT:
3732                 case EACCES:
3733                 case EPERM:
3734                 case ENOTDIR:
3735                         result[i] = error;
3736                         break;
3737                 case 0:
3738                         /* run this access check */
3739                         result[i] = access1(vp, dvp, input[i].ad_flags, &context);
3740                         break;
3741                 default:
3742                         /* fatal lookup error */
3743
3744                         goto out;
3745                 }
3746         }
3747
3748         AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
3749
3750         /* copy out results */
3751         error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
3752
3753 out:
3754         if (input && input != stack_input)
3755                 FREE(input, M_TEMP);
3756         if (result)
3757                 FREE(result, M_TEMP);
3758         if (vp)
3759                 vnode_put(vp);
3760         if (dvp)
3761                 vnode_put(dvp);
3762         if (IS_VALID_CRED(context.vc_ucred))
3763                 kauth_cred_unref(&context.vc_ucred);
3764         return(error);
3765 }
3766
3767
3768 /*
3769  * Returns:     0                       Success
3770  *              namei:EFAULT            Bad address
3771  *              namei:ENAMETOOLONG      Filename too long
3772  *              namei:ENOENT            No such file or directory
3773  *              namei:ELOOP             Too many levels of symbolic links
3774  *              namei:EBADF             Bad file descriptor
3775  *              namei:ENOTDIR           Not a directory
3776  *              namei:???
3777  *              access1:
3778  */
3779 int
3780 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
3781 {
3782         int error;
3783         struct nameidata nd;
3784         int niopts;
3785         struct vfs_context context;
3786 #if NAMEDRSRCFORK
3787         int is_namedstream = 0;
3788 #endif
3789
3790         /*
3791          * Access is defined as checking against the process'
3792          * real identity, even if operations are checking the
3793          * effective identity.  So we need to tweak the credential
3794          * in the context.
3795          */
3796         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
3797         context.vc_thread = current_thread();
3798
3799         niopts = FOLLOW | AUDITVNPATH1;
3800         /* need parent for vnode_authorize for deletion test */
3801         if (uap->flags & _DELETE_OK)
3802                 niopts |= WANTPARENT;
3803         NDINIT(&nd, LOOKUP, niopts, UIO_USERSPACE, uap->path, &context);
3804
3805 #if NAMEDRSRCFORK
3806         /* access(F_OK) calls are allowed for resource forks. */
3807         if (uap->flags == F_OK)
3808                 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
3809 #endif
3810         error = namei(&nd);
3811         if (error)
3812                 goto out;
3813
3814 #if NAMEDRSRCFORK
3815         /* Grab reference on the shadow stream file vnode to
3816          * force an inactive on release which will mark it
3817          * for recycle.
3818          */
3819         if (vnode_isnamedstream(nd.ni_vp) &&
3820             (nd.ni_vp->v_parent != NULLVP) &&
3821             vnode_isshadow(nd.ni_vp)) {
3822                 is_namedstream = 1;
3823                 vnode_ref(nd.ni_vp);
3824         }
3825 #endif
3826
3827         error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context);
3828
3829 #if NAMEDRSRCFORK
3830         if (is_namedstream) {
3831                 vnode_rele(nd.ni_vp);
3832         }
3833 #endif
3834
3835         vnode_put(nd.ni_vp);
3836         if (uap->flags & _DELETE_OK)
3837                 vnode_put(nd.ni_dvp);
3838         nameidone(&nd);
3839
3840 out:
3841         kauth_cred_unref(&context.vc_ucred);
3842         return(error);
3843 }
3844
3845
3846 /*
3847  * Returns:     0                       Success
3848  *              EFAULT
3849  *      copyout:EFAULT
3850  *      namei:???
3851  *      vn_stat:???
3852  */
3853 static int
3854 stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3855 {
3856         union {
3857                 struct stat sb;
3858                 struct stat64 sb64;
3859         } source;
3860         union {
3861                 struct user64_stat user64_sb;
3862                 struct user32_stat user32_sb;
3863                 struct user64_stat64 user64_sb64;
3864                 struct user32_stat64 user32_sb64;
3865         } dest;
3866         caddr_t sbp;
3867         int error, my_size;
3868         kauth_filesec_t fsec;
3869         size_t xsecurity_bufsize;
3870         void * statptr;
3871
3872 #if NAMEDRSRCFORK
3873         int is_namedstream = 0;
3874         /* stat calls are allowed for resource forks. */
3875         ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
3876 #endif
3877         error = namei(ndp);
3878         if (error)
3879                 return (error);
3880         fsec = KAUTH_FILESEC_NONE;
3881
3882         statptr = (void *)&source;
3883
3884 #if NAMEDRSRCFORK
3885         /* Grab reference on the shadow stream file vnode to
3886          * force an inactive on release which will mark it
3887          * for recycle.
3888          */
3889         if (vnode_isnamedstream(ndp->ni_vp) &&
3890             (ndp->ni_vp->v_parent != NULLVP) &&
3891             vnode_isshadow(ndp->ni_vp)) {
3892                 is_namedstream = 1;
3893                 vnode_ref(ndp->ni_vp);
3894         }
3895 #endif
3896
3897         error = vn_stat(ndp->ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
3898
3899 #if NAMEDRSRCFORK
3900         if (is_namedstream) {
3901                 vnode_rele(ndp->ni_vp);
3902         }
3903 #endif
3904         vnode_put(ndp->ni_vp);
3905         nameidone(ndp);
3906
3907         if (error)
3908                 return (error);
3909         /* Zap spare fields */
3910         if (isstat64 != 0) {
3911                 source.sb64.st_lspare = 0;
3912                 source.sb64.st_qspare[0] = 0LL;
3913                 source.sb64.st_qspare[1] = 0LL;
3914                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
3915                         munge_user64_stat64(&source.sb64, &dest.user64_sb64);
3916                         my_size = sizeof(dest.user64_sb64);
3917                         sbp = (caddr_t)&dest.user64_sb64;
3918                 } else {
3919                         munge_user32_stat64(&source.sb64, &dest.user32_sb64);
3920                         my_size = sizeof(dest.user32_sb64);
3921                         sbp = (caddr_t)&dest.user32_sb64;
3922                 }
3923                 /*
3924                  * Check if we raced (post lookup) against the last unlink of a file.
3925                  */
3926                 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
3927                         source.sb64.st_nlink = 1;
3928                 }
3929         } else {
3930                 source.sb.st_lspare = 0;
3931                 source.sb.st_qspare[0] = 0LL;
3932                 source.sb.st_qspare[1] = 0LL;
3933                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
3934                         munge_user64_stat(&source.sb, &dest.user64_sb);
3935                         my_size = sizeof(dest.user64_sb);
3936                         sbp = (caddr_t)&dest.user64_sb;
3937                 } else {
3938                         munge_user32_stat(&source.sb, &dest.user32_sb);
3939                         my_size = sizeof(dest.user32_sb);
3940                         sbp = (caddr_t)&dest.user32_sb;
3941                 }
3942
3943                 /*
3944                  * Check if we raced (post lookup) against the last unlink of a file.
3945                  */
3946                 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
3947                         source.sb.st_nlink = 1;
3948                 }
3949         }
3950         if ((error = copyout(sbp, ub, my_size)) != 0)
3951                 goto out;
3952
3953         /* caller wants extended security information? */
3954         if (xsecurity != USER_ADDR_NULL) {
3955
3956                 /* did we get any? */
3957                 if (fsec == KAUTH_FILESEC_NONE) {
3958                         if (susize(xsecurity_size, 0) != 0) {
3959                                 error = EFAULT;
3960                                 goto out;
3961                         }
3962                 } else {
3963                         /* find the user buffer size */
3964                         xsecurity_bufsize = fusize(xsecurity_size);
3965
3966                         /* copy out the actual data size */
3967                         if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
3968                                 error = EFAULT;
3969                                 goto out;
3970                         }
3971
3972                         /* if the caller supplied enough room, copy out to it */
3973                         if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
3974                                 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
3975                 }
3976         }
3977 out:
3978         if (fsec != KAUTH_FILESEC_NONE)
3979                 kauth_filesec_free(fsec);
3980         return (error);
3981 }
3982
3983 /*
3984  * Get file status; this version follows links.
3985  *
3986  * Returns:     0                       Success
3987  *      stat2:???                       [see stat2() in this file]
3988  */
3989 static int
3990 stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3991 {
3992         struct nameidata nd;
3993         vfs_context_t ctx = vfs_context_current();
3994
3995         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
3996             UIO_USERSPACE, path, ctx);
3997         return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
3998 }
3999
4000 /*
4001  * stat_extended: Get file status; with extended security (ACL).
4002  *
4003  * Parameters:    p                       (ignored)
4004  *                uap                     User argument descriptor (see below)
4005  *                retval                  (ignored)
4006  *
4007  * Indirect:      uap->path               Path of file to get status from
4008  *                uap->ub                 User buffer (holds file status info)
4009  *                uap->xsecurity          ACL to get (extended security)
4010  *                uap->xsecurity_size     Size of ACL
4011  *
4012  * Returns:        0                      Success
4013  *                !0                      errno value
4014  *
4015  */
4016 int
4017 stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused int32_t *retval)
4018 {
4019         return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
4020 }
4021
4022 /*
4023  * Returns:     0                       Success
4024  *      stat1:???                       [see stat1() in this file]
4025  */
4026 int
4027 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
4028 {
4029         return(stat1(uap->path, uap->ub, 0, 0, 0));
4030 }
4031
4032 int
4033 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
4034 {
4035         return(stat1(uap->path, uap->ub, 0, 0, 1));
4036 }
4037
4038 /*
4039  * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
4040  *
4041  * Parameters:    p                       (ignored)
4042  *                uap                     User argument descriptor (see below)
4043  *                retval                  (ignored)
4044  *
4045  * Indirect:      uap->path               Path of file to get status from
4046  *                uap->ub                 User buffer (holds file status info)
4047  *                uap->xsecurity          ACL to get (extended security)
4048  *                uap->xsecurity_size     Size of ACL
4049  *
4050  * Returns:        0                      Success
4051  *                !0                      errno value
4052  *
4053  */
4054 int
4055 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
4056 {
4057         return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
4058 }
4059 /*
4060  * Get file status; this version does not follow links.
4061  */
4062 static int
4063 lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4064 {
4065         struct nameidata nd;
4066         vfs_context_t ctx = vfs_context_current();
4067
4068         NDINIT(&nd, LOOKUP, NOTRIGGER | NOFOLLOW | AUDITVNPATH1,
4069             UIO_USERSPACE, path, ctx);
4070
4071         return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
4072 }
4073
4074 /*
4075  * lstat_extended: Get file status; does not follow links; with extended security (ACL).
4076  *
4077  * Parameters:    p                       (ignored)
4078  *                uap                     User argument descriptor (see below)
4079  *                retval                  (ignored)
4080  *
4081  * Indirect:      uap->path               Path of file to get status from
4082  *                uap->ub                 User buffer (holds file status info)
4083  *                uap->xsecurity          ACL to get (extended security)
4084  *                uap->xsecurity_size     Size of ACL
4085  *
4086  * Returns:        0                      Success
4087  *                !0                      errno value
4088  *
4089  */
4090 int
4091 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
4092 {
4093         return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
4094 }
4095
4096 int
4097 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
4098 {
4099         return(lstat1(uap->path, uap->ub, 0, 0, 0));
4100 }
4101
4102 int
4103 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
4104 {
4105         return(lstat1(uap->path, uap->ub, 0, 0, 1));
4106 }
4107
4108 /*
4109  * lstat64_extended: Get file status; can handle large inode numbers; does not
4110  * follow links; with extended security (ACL).
4111  *
4112  * Parameters:    p                       (ignored)
4113  *                uap                     User argument descriptor (see below)
4114  *                retval                  (ignored)
4115  *
4116  * Indirect:      uap->path               Path of file to get status from
4117  *                uap->ub                 User buffer (holds file status info)
4118  *                uap->xsecurity          ACL to get (extended security)
4119  *                uap->xsecurity_size     Size of ACL
4120  *
4121  * Returns:        0                      Success
4122  *                !0                      errno value
4123  *
4124  */
4125 int
4126 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
4127 {
4128         return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
4129 }
4130
4131 /*
4132  * Get configurable pathname variables.
4133  *
4134  * Returns:     0                       Success
4135  *      namei:???
4136  *      vn_pathconf:???
4137  *
4138  * Notes:       Global implementation  constants are intended to be
4139  *              implemented in this function directly; all other constants
4140  *              are per-FS implementation, and therefore must be handled in
4141  *              each respective FS, instead.
4142  *
4143  * XXX We implement some things globally right now that should actually be
4144  * XXX per-FS; we will need to deal with this at some point.
4145  */
4146 /* ARGSUSED */
4147 int
4148 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
4149 {
4150         int error;
4151         struct nameidata nd;
4152         vfs_context_t ctx = vfs_context_current();
4153
4154         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4155                 UIO_USERSPACE, uap->path, ctx);
4156         error = namei(&nd);
4157         if (error)
4158                 return (error);
4159
4160         error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
4161
4162         vnode_put(nd.ni_vp);
4163         nameidone(&nd);
4164         return (error);
4165 }
4166
4167 /*
4168  * Return target name of a symbolic link.
4169  */
4170 /* ARGSUSED */
4171 int
4172 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
4173 {
4174         vnode_t vp;
4175         uio_t auio;
4176         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
4177         int error;
4178         struct nameidata nd;
4179         vfs_context_t ctx = vfs_context_current();
4180         char uio_buf[ UIO_SIZEOF(1) ];
4181
4182         NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNPATH1,
4183                 UIO_USERSPACE, uap->path, ctx);
4184         error = namei(&nd);
4185         if (error)
4186                 return (error);
4187         vp = nd.ni_vp;
4188
4189         nameidone(&nd);
4190
4191         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
4192                                                                   &uio_buf[0], sizeof(uio_buf));
4193         uio_addiov(auio, uap->buf, uap->count);
4194         if (vp->v_type != VLNK)
4195                 error = EINVAL;
4196         else {
4197 #if CONFIG_MACF
4198                 error = mac_vnode_check_readlink(ctx,
4199                     vp);
4200 #endif
4201                 if (error == 0)
4202                         error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx);
4203                 if (error == 0)
4204                         error = VNOP_READLINK(vp, auio, ctx);
4205         }
4206         vnode_put(vp);
4207
4208         /* Safe: uio_resid() is bounded above by "count", and "count" is an int  */
4209         *retval = uap->count - (int)uio_resid(auio);
4210         return (error);
4211 }
4212
4213 /*
4214  * Change file flags.
4215  */
4216 static int
4217 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
4218 {
4219         struct vnode_attr va;
4220         kauth_action_t action;
4221         int error;
4222
4223         VATTR_INIT(&va);
4224         VATTR_SET(&va, va_flags, flags);
4225
4226 #if CONFIG_MACF
4227         error = mac_vnode_check_setflags(ctx, vp, flags);
4228         if (error)
4229                 goto out;
4230 #endif
4231
4232         /* request authorisation, disregard immutability */
4233         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4234                 goto out;
4235         /*
4236          * Request that the auth layer disregard those file flags it's allowed to when
4237          * authorizing this operation; we need to do this in order to be able to
4238          * clear immutable flags.
4239          */
4240         if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
4241                 goto out;
4242         error = vnode_setattr(vp, &va, ctx);
4243
4244         if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
4245                 error = ENOTSUP;
4246         }
4247 out:
4248         vnode_put(vp);
4249         return(error);
4250 }
4251
4252 /*
4253  * Change flags of a file given a path name.
4254  */
4255 /* ARGSUSED */
4256 int
4257 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
4258 {
4259         vnode_t vp;
4260         vfs_context_t ctx = vfs_context_current();
4261         int error;
4262         struct nameidata nd;
4263
4264         AUDIT_ARG(fflags, uap->flags);
4265         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4266                 UIO_USERSPACE, uap->path, ctx);
4267         error = namei(&nd);
4268         if (error)
4269                 return (error);
4270         vp = nd.ni_vp;
4271         nameidone(&nd);
4272
4273         error = chflags1(vp, uap->flags, ctx);
4274
4275         return(error);
4276 }
4277
4278 /*
4279  * Change flags of a file given a file descriptor.
4280  */
4281 /* ARGSUSED */
4282 int
4283 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
4284 {
4285         vnode_t vp;
4286         int error;
4287
4288         AUDIT_ARG(fd, uap->fd);
4289         AUDIT_ARG(fflags, uap->flags);
4290         if ( (error = file_vnode(uap->fd, &vp)) )
4291                 return (error);
4292
4293         if ((error = vnode_getwithref(vp))) {
4294                 file_drop(uap->fd);
4295                 return(error);
4296         }
4297
4298         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4299
4300         error = chflags1(vp, uap->flags, vfs_context_current());
4301
4302         file_drop(uap->fd);
4303         return (error);
4304 }
4305
4306 /*
4307  * Change security information on a filesystem object.
4308  *
4309  * Returns:     0                       Success
4310  *              EPERM                   Operation not permitted
4311  *              vnode_authattr:???      [anything vnode_authattr can return]
4312  *              vnode_authorize:???     [anything vnode_authorize can return]
4313  *              vnode_setattr:???       [anything vnode_setattr can return]
4314  *
4315  * Notes:       If vnode_authattr or vnode_authorize return EACCES, it will be
4316  *              translated to EPERM before being returned.
4317  */
4318 static int
4319 chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
4320 {
4321         kauth_action_t action;
4322         int error;
4323
4324         AUDIT_ARG(mode, vap->va_mode);
4325         /* XXX audit new args */
4326
4327 #if NAMEDSTREAMS
4328         /* chmod calls are not allowed for resource forks. */
4329         if (vp->v_flag & VISNAMEDSTREAM) {
4330                 return (EPERM);
4331         }
4332 #endif
4333
4334 #if CONFIG_MACF
4335         error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode);
4336         if (error)
4337                 return (error);
4338 #endif
4339
4340         /* make sure that the caller is allowed to set this security information */
4341         if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
4342             ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
4343                 if (error == EACCES)
4344                         error = EPERM;
4345                 return(error);
4346         }
4347
4348         error = vnode_setattr(vp, vap, ctx);
4349
4350         return (error);
4351 }
4352
4353
4354 /*
4355  * Change mode of a file given a path name.
4356  *
4357  * Returns:     0                       Success
4358  *              namei:???               [anything namei can return]
4359  *              chmod2:???              [anything chmod2 can return]
4360  */
4361 static int
4362 chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
4363 {
4364         struct nameidata nd;
4365         int error;
4366
4367         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4368                 UIO_USERSPACE, path, ctx);
4369         if ((error = namei(&nd)))
4370                 return (error);
4371         error = chmod2(ctx, nd.ni_vp, vap);
4372         vnode_put(nd.ni_vp);
4373         nameidone(&nd);
4374         return(error);
4375 }
4376
4377 /*
4378  * chmod_extended: Change the mode of a file given a path name; with extended
4379  * argument list (including extended security (ACL)).
4380  *
4381  * Parameters:  p                       Process requesting the open
4382  *              uap                     User argument descriptor (see below)
4383  *              retval                  (ignored)
4384  *
4385  * Indirect:    uap->path               Path to object (same as 'chmod')
4386  *              uap->uid                UID to set
4387  *              uap->gid                GID to set
4388  *              uap->mode               File mode to set (same as 'chmod')
4389  *              uap->xsecurity          ACL to set (or delete)
4390  *
4391  * Returns:     0                       Success
4392  *              !0                      errno value
4393  *
4394  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
4395  *
4396  * XXX:         We should enummerate the possible errno values here, and where
4397  *              in the code they originated.
4398  */
4399 int
4400 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
4401 {
4402         int error;
4403         struct vnode_attr va;
4404         kauth_filesec_t xsecdst;
4405
4406         AUDIT_ARG(owner, uap->uid, uap->gid);
4407
4408         VATTR_INIT(&va);
4409         if (uap->mode != -1)
4410                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4411         if (uap->uid != KAUTH_UID_NONE)
4412                 VATTR_SET(&va, va_uid, uap->uid);
4413         if (uap->gid != KAUTH_GID_NONE)
4414                 VATTR_SET(&va, va_gid, uap->gid);
4415
4416         xsecdst = NULL;
4417         switch(uap->xsecurity) {
4418                 /* explicit remove request */
4419         case CAST_USER_ADDR_T((void *)1):       /* _FILESEC_REMOVE_ACL */
4420                 VATTR_SET(&va, va_acl, NULL);
4421                 break;
4422                 /* not being set */
4423         case USER_ADDR_NULL:
4424                 break;
4425         default:
4426                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4427                         return(error);
4428                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4429                 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
4430         }
4431
4432         error = chmod1(vfs_context_current(), uap->path, &va);
4433
4434         if (xsecdst != NULL)
4435                 kauth_filesec_free(xsecdst);
4436         return(error);
4437 }
4438
4439 /*
4440  * Returns:     0                       Success
4441  *              chmod1:???              [anything chmod1 can return]
4442  */
4443 int
4444 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
4445 {
4446         struct vnode_attr va;
4447
4448         VATTR_INIT(&va);
4449         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4450
4451         return(chmod1(vfs_context_current(), uap->path, &va));
4452 }
4453
4454 /*
4455  * Change mode of a file given a file descriptor.
4456  */
4457 static int
4458 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
4459 {
4460         vnode_t vp;
4461         int error;
4462
4463         AUDIT_ARG(fd, fd);
4464
4465         if ((error = file_vnode(fd, &vp)) != 0)
4466                 return (error);
4467         if ((error = vnode_getwithref(vp)) != 0) {
4468                 file_drop(fd);
4469                 return(error);
4470         }
4471         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4472
4473         error = chmod2(vfs_context_current(), vp, vap);
4474         (void)vnode_put(vp);
4475         file_drop(fd);
4476
4477         return (error);
4478 }
4479
4480 /*
4481  * fchmod_extended: Change mode of a file given a file descriptor; with
4482  * extended argument list (including extended security (ACL)).
4483  *
4484  * Parameters:    p                       Process requesting to change file mode
4485  *                uap                     User argument descriptor (see below)
4486  *                retval                  (ignored)
4487  *
4488  * Indirect:      uap->mode               File mode to set (same as 'chmod')
4489  *                uap->uid                UID to set
4490  *                uap->gid                GID to set
4491  *                uap->xsecurity          ACL to set (or delete)
4492  *                uap->fd                 File descriptor of file to change mode
4493  *
4494  * Returns:        0                      Success
4495  *                !0                      errno value
4496  *
4497  */
4498 int
4499 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
4500 {
4501         int error;
4502         struct vnode_attr va;
4503         kauth_filesec_t xsecdst;
4504
4505         AUDIT_ARG(owner, uap->uid, uap->gid);
4506
4507         VATTR_INIT(&va);
4508         if (uap->mode != -1)
4509                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4510         if (uap->uid != KAUTH_UID_NONE)
4511                 VATTR_SET(&va, va_uid, uap->uid);
4512         if (uap->gid != KAUTH_GID_NONE)
4513                 VATTR_SET(&va, va_gid, uap->gid);
4514
4515         xsecdst = NULL;
4516         switch(uap->xsecurity) {
4517         case USER_ADDR_NULL:
4518                 VATTR_SET(&va, va_acl, NULL);
4519                 break;
4520         case CAST_USER_ADDR_T(-1):
4521                 break;
4522         default:
4523                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4524                         return(error);
4525                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4526         }
4527
4528         error = fchmod1(p, uap->fd, &va);
4529
4530
4531         switch(uap->xsecurity) {
4532         case USER_ADDR_NULL:
4533         case CAST_USER_ADDR_T(-1):
4534                 break;
4535         default:
4536                 if (xsecdst != NULL)
4537                         kauth_filesec_free(xsecdst);
4538         }
4539         return(error);
4540 }
4541
4542 int
4543 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
4544 {
4545         struct vnode_attr va;
4546
4547         VATTR_INIT(&va);
4548         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4549
4550         return(fchmod1(p, uap->fd, &va));
4551 }
4552
4553
4554 /*
4555  * Set ownership given a path name.
4556  */
4557 /* ARGSUSED */
4558 static int
4559 chown1(vfs_context_t ctx, struct chown_args *uap, __unused int32_t *retval, int follow)
4560 {
4561         vnode_t vp;
4562         struct vnode_attr va;
4563         int error;
4564         struct nameidata nd;
4565         kauth_action_t action;
4566
4567         AUDIT_ARG(owner, uap->uid, uap->gid);
4568
4569         NDINIT(&nd, LOOKUP, (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1,
4570                 UIO_USERSPACE, uap->path, ctx);
4571         error = namei(&nd);
4572         if (error)
4573                 return (error);
4574         vp = nd.ni_vp;
4575
4576         nameidone(&nd);
4577
4578         VATTR_INIT(&va);
4579         if (uap->uid != VNOVAL)
4580                 VATTR_SET(&va, va_uid, uap->uid);
4581         if (uap->gid != VNOVAL)
4582                 VATTR_SET(&va, va_gid, uap->gid);
4583
4584 #if CONFIG_MACF
4585         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
4586         if (error)
4587                 goto out;
4588 #endif
4589
4590         /* preflight and authorize attribute changes */
4591         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4592                 goto out;
4593         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
4594                 goto out;
4595         error = vnode_setattr(vp, &va, ctx);
4596
4597 out:
4598         /*
4599          * EACCES is only allowed from namei(); permissions failure should
4600          * return EPERM, so we need to translate the error code.
4601          */
4602         if (error == EACCES)
4603                 error = EPERM;
4604
4605         vnode_put(vp);
4606         return (error);
4607 }
4608
4609 int
4610 chown(__unused proc_t p, struct chown_args *uap, int32_t *retval)
4611 {
4612         return chown1(vfs_context_current(), uap, retval, 1);
4613 }
4614
4615 int
4616 lchown(__unused proc_t p, struct lchown_args *uap, int32_t *retval)
4617 {
4618         /* Argument list identical, but machine generated; cast for chown1() */
4619         return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0);
4620 }
4621
4622 /*
4623  * Set ownership given a file descriptor.
4624  */
4625 /* ARGSUSED */
4626 int
4627 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
4628 {
4629         struct vnode_attr va;
4630         vfs_context_t ctx = vfs_context_current();
4631         vnode_t vp;
4632         int error;
4633         kauth_action_t action;
4634
4635         AUDIT_ARG(owner, uap->uid, uap->gid);
4636         AUDIT_ARG(fd, uap->fd);
4637
4638         if ( (error = file_vnode(uap->fd, &vp)) )
4639                 return (error);
4640
4641         if ( (error = vnode_getwithref(vp)) ) {
4642                 file_drop(uap->fd);
4643                 return(error);
4644         }
4645         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4646
4647         VATTR_INIT(&va);
4648         if (uap->uid != VNOVAL)
4649                 VATTR_SET(&va, va_uid, uap->uid);
4650         if (uap->gid != VNOVAL)
4651                 VATTR_SET(&va, va_gid, uap->gid);
4652
4653 #if NAMEDSTREAMS
4654         /* chown calls are not allowed for resource forks. */
4655         if (vp->v_flag & VISNAMEDSTREAM) {
4656                 error = EPERM;
4657                 goto out;
4658         }
4659 #endif
4660
4661 #if CONFIG_MACF
4662         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
4663         if (error)
4664                 goto out;
4665 #endif
4666
4667         /* preflight and authorize attribute changes */
4668         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4669                 goto out;
4670         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
4671                 if (error == EACCES)
4672                         error = EPERM;
4673                 goto out;
4674         }
4675         error = vnode_setattr(vp, &va, ctx);
4676
4677 out:
4678         (void)vnode_put(vp);
4679         file_drop(uap->fd);
4680         return (error);
4681 }
4682
4683 static int
4684 getutimes(user_addr_t usrtvp, struct timespec *tsp)
4685 {
4686         int error;
4687
4688         if (usrtvp == USER_ADDR_NULL) {
4689                 struct timeval old_tv;
4690                 /* XXX Y2038 bug because of microtime argument */
4691                 microtime(&old_tv);
4692                 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
4693                 tsp[1] = tsp[0];
4694         } else {
4695                 if (IS_64BIT_PROCESS(current_proc())) {
4696                         struct user64_timeval tv[2];
4697                         error = copyin(usrtvp, (void *)tv, sizeof(tv));
4698                         if (error)
4699                                 return (error);
4700                         TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
4701                         TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
4702                 } else {
4703                         struct user32_timeval tv[2];
4704                         error = copyin(usrtvp, (void *)tv, sizeof(tv));
4705                         if (error)
4706                                 return (error);
4707                         TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
4708                         TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
4709                 }
4710         }
4711         return 0;
4712 }
4713
4714 static int
4715 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
4716         int nullflag)
4717 {
4718         int error;
4719         struct vnode_attr va;
4720         kauth_action_t action;
4721
4722         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4723
4724         VATTR_INIT(&va);
4725         VATTR_SET(&va, va_access_time, ts[0]);
4726         VATTR_SET(&va, va_modify_time, ts[1]);
4727         if (nullflag)
4728                 va.va_vaflags |= VA_UTIMES_NULL;
4729
4730 #if NAMEDSTREAMS
4731         /* utimes calls are not allowed for resource forks. */
4732         if (vp->v_flag & VISNAMEDSTREAM) {
4733                 error = EPERM;
4734                 goto out;
4735         }
4736 #endif
4737
4738 #if CONFIG_MACF
4739         error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
4740         if (error)
4741                 goto out;
4742 #endif
4743         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
4744                 if (!nullflag && error == EACCES)
4745                         error = EPERM;
4746                 goto out;
4747         }
4748
4749         /* since we may not need to auth anything, check here */
4750         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
4751                 if (!nullflag && error == EACCES)
4752                         error = EPERM;
4753                 goto out;
4754         }
4755         error = vnode_setattr(vp, &va, ctx);
4756
4757 out:
4758         return error;
4759 }
4760
4761 /*
4762  * Set the access and modification times of a file.
4763  */
4764 /* ARGSUSED */
4765 int
4766 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
4767 {
4768         struct timespec ts[2];
4769         user_addr_t usrtvp;
4770         int error;
4771         struct nameidata nd;
4772         vfs_context_t ctx = vfs_context_current();
4773
4774         /*
4775          * AUDIT: Needed to change the order of operations to do the
4776          * name lookup first because auditing wants the path.
4777          */
4778         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4779                 UIO_USERSPACE, uap->path, ctx);
4780         error = namei(&nd);
4781         if (error)
4782                 return (error);
4783         nameidone(&nd);
4784
4785         /*
4786          * Fetch the user-supplied time.  If usrtvp is USER_ADDR_NULL, we fetch
4787          * the current time instead.
4788          */
4789         usrtvp = uap->tptr;
4790         if ((error = getutimes(usrtvp, ts)) != 0)
4791                 goto out;
4792
4793         error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
4794
4795 out:
4796         vnode_put(nd.ni_vp);
4797         return (error);
4798 }
4799
4800 /*
4801  * Set the access and modification times of a file.
4802  */
4803 /* ARGSUSED */
4804 int
4805 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
4806 {
4807         struct timespec ts[2];
4808         vnode_t vp;
4809         user_addr_t usrtvp;
4810         int error;
4811
4812         AUDIT_ARG(fd, uap->fd);
4813         usrtvp = uap->tptr;
4814         if ((error = getutimes(usrtvp, ts)) != 0)
4815                 return (error);
4816         if ((error = file_vnode(uap->fd, &vp)) != 0)
4817                 return (error);
4818         if((error = vnode_getwithref(vp))) {
4819                 file_drop(uap->fd);
4820                 return(error);
4821         }
4822
4823         error =  setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
4824         vnode_put(vp);
4825         file_drop(uap->fd);
4826         return(error);
4827 }
4828
4829 /*
4830  * Truncate a file given its path name.
4831  */
4832 /* ARGSUSED */
4833 int
4834 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
4835 {
4836         vnode_t vp;
4837         struct vnode_attr va;
4838         vfs_context_t ctx = vfs_context_current();
4839         int error;
4840         struct nameidata nd;
4841         kauth_action_t action;
4842
4843         if (uap->length < 0)
4844                 return(EINVAL);
4845         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4846                 UIO_USERSPACE, uap->path, ctx);
4847         if ((error = namei(&nd)))
4848                 return (error);
4849         vp = nd.ni_vp;
4850
4851         nameidone(&nd);
4852
4853         VATTR_INIT(&va);
4854         VATTR_SET(&va, va_data_size, uap->length);
4855
4856 #if CONFIG_MACF
4857         error = mac_vnode_check_truncate(ctx, NOCRED, vp);
4858         if (error)
4859                 goto out;
4860 #endif
4861
4862         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4863                 goto out;
4864         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
4865                 goto out;
4866         error = vnode_setattr(vp, &va, ctx);
4867 out:
4868         vnode_put(vp);
4869         return (error);
4870 }
4871
4872 /*
4873  * Truncate a file given a file descriptor.
4874  */
4875 /* ARGSUSED */
4876 int
4877 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
4878 {
4879         vfs_context_t ctx = vfs_context_current();
4880         struct vnode_attr va;
4881         vnode_t vp;
4882         struct fileproc *fp;
4883         int error ;
4884         int fd = uap->fd;
4885
4886         AUDIT_ARG(fd, uap->fd);
4887         if (uap->length < 0)
4888                 return(EINVAL);
4889
4890         if ( (error = fp_lookup(p,fd,&fp,0)) ) {
4891                 return(error);
4892         }
4893
4894         if (fp->f_fglob->fg_type == DTYPE_PSXSHM) {
4895                 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
4896                 goto out;
4897         }
4898         if (fp->f_fglob->fg_type != DTYPE_VNODE)  {
4899                 error = EINVAL;
4900                 goto out;
4901         }
4902
4903         vp = (vnode_t)fp->f_fglob->fg_data;
4904
4905         if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
4906                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
4907                 error = EINVAL;
4908                 goto out;
4909         }
4910
4911         if ((error = vnode_getwithref(vp)) != 0) {
4912                 goto out;
4913         }
4914
4915         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4916
4917 #if CONFIG_MACF
4918         error = mac_vnode_check_truncate(ctx,
4919             fp->f_fglob->fg_cred, vp);
4920         if (error) {
4921                 (void)vnode_put(vp);
4922                 goto out;
4923         }
4924 #endif
4925         VATTR_INIT(&va);
4926         VATTR_SET(&va, va_data_size, uap->length);
4927         error = vnode_setattr(vp, &va, ctx);
4928         (void)vnode_put(vp);
4929 out:
4930         file_drop(fd);
4931         return (error);
4932 }
4933
4934
4935 /*
4936  * Sync an open file with synchronized I/O _file_ integrity completion
4937  */
4938 /* ARGSUSED */
4939 int
4940 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
4941 {
4942         __pthread_testcancel(1);
4943         return(fsync_common(p, uap, MNT_WAIT));
4944 }
4945
4946
4947 /*
4948  * Sync an open file with synchronized I/O _file_ integrity completion
4949  *
4950  * Notes:       This is a legacy support function that does not test for
4951  *              thread cancellation points.
4952  */
4953 /* ARGSUSED */
4954 int
4955 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
4956 {
4957         return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
4958 }
4959
4960
4961 /*
4962  * Sync an open file with synchronized I/O _data_ integrity completion
4963  */
4964 /* ARGSUSED */
4965 int
4966 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
4967 {
4968         __pthread_testcancel(1);
4969         return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
4970 }
4971
4972
4973 /*
4974  * fsync_common
4975  *
4976  * Common fsync code to support both synchronized I/O file integrity completion
4977  * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
4978  *
4979  * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
4980  * will only guarantee that the file data contents are retrievable.  If
4981  * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
4982  * includes additional metadata unnecessary for retrieving the file data
4983  * contents, such as atime, mtime, ctime, etc., also be committed to stable
4984  * storage.
4985  *
4986  * Parameters:  p                               The process
4987  *              uap->fd                         The descriptor to synchronize
4988  *              flags                           The data integrity flags
4989  *
4990  * Returns:     int                             Success
4991  *      fp_getfvp:EBADF                         Bad file descriptor
4992  *      fp_getfvp:ENOTSUP                       fd does not refer to a vnode
4993  *      VNOP_FSYNC:???                          unspecified
4994  *
4995  * Notes:       We use struct fsync_args because it is a short name, and all
4996  *              caller argument structures are otherwise identical.
4997  */
4998 static int
4999 fsync_common(proc_t p, struct fsync_args *uap, int flags)
5000 {
5001         vnode_t vp;
5002         struct fileproc *fp;
5003         vfs_context_t ctx = vfs_context_current();
5004         int error;
5005
5006         AUDIT_ARG(fd, uap->fd);
5007
5008         if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
5009                 return (error);
5010         if ( (error = vnode_getwithref(vp)) ) {
5011                 file_drop(uap->fd);
5012                 return(error);
5013         }
5014
5015         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5016
5017         error = VNOP_FSYNC(vp, flags, ctx);
5018
5019 #if NAMEDRSRCFORK
5020         /* Sync resource fork shadow file if necessary. */
5021         if ((error == 0) &&
5022             (vp->v_flag & VISNAMEDSTREAM) &&
5023             (vp->v_parent != NULLVP) &&
5024             vnode_isshadow(vp) &&
5025             (fp->f_flags & FP_WRITTEN)) {
5026                 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
5027         }
5028 #endif
5029
5030         (void)vnode_put(vp);
5031         file_drop(uap->fd);
5032         return (error);
5033 }
5034
5035 /*
5036  * Duplicate files.  Source must be a file, target must be a file or
5037  * must not exist.
5038  *
5039  * XXX Copyfile authorisation checking is woefully inadequate, and will not
5040  *     perform inheritance correctly.
5041  */
5042 /* ARGSUSED */
5043 int
5044 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
5045 {
5046         vnode_t tvp, fvp, tdvp, sdvp;
5047         struct nameidata fromnd, tond;
5048         int error;
5049         vfs_context_t ctx = vfs_context_current();
5050
5051         /* Check that the flags are valid. */
5052
5053         if (uap->flags & ~CPF_MASK) {
5054                 return(EINVAL);
5055         }
5056
5057         NDINIT(&fromnd, LOOKUP, SAVESTART | AUDITVNPATH1,
5058                 UIO_USERSPACE, uap->from, ctx);
5059         if ((error = namei(&fromnd)))
5060                 return (error);
5061         fvp = fromnd.ni_vp;
5062
5063         NDINIT(&tond, CREATE,  LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
5064             UIO_USERSPACE, uap->to, ctx);
5065         if ((error = namei(&tond))) {
5066                 goto out1;
5067         }
5068         tdvp = tond.ni_dvp;
5069         tvp = tond.ni_vp;
5070
5071         if (tvp != NULL) {
5072                 if (!(uap->flags & CPF_OVERWRITE)) {
5073                         error = EEXIST;
5074                         goto out;
5075                 }
5076         }
5077         if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
5078                 error = EISDIR;
5079                 goto out;
5080         }
5081
5082         if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
5083                 goto out;
5084
5085         if (fvp == tdvp)
5086                 error = EINVAL;
5087         /*
5088          * If source is the same as the destination (that is the
5089          * same inode number) then there is nothing to do.
5090          * (fixed to have POSIX semantics - CSM 3/2/98)
5091          */
5092         if (fvp == tvp)
5093                 error = -1;
5094         if (!error)
5095                 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
5096 out:
5097         sdvp = tond.ni_startdir;
5098         /*
5099          * nameidone has to happen before we vnode_put(tdvp)
5100          * since it may need to release the fs_nodelock on the tdvp
5101          */
5102         nameidone(&tond);
5103
5104         if (tvp)
5105                 vnode_put(tvp);
5106         vnode_put(tdvp);
5107         vnode_put(sdvp);
5108 out1:
5109         vnode_put(fvp);
5110
5111         if (fromnd.ni_startdir)
5112                 vnode_put(fromnd.ni_startdir);
5113         nameidone(&fromnd);
5114
5115         if (error == -1)
5116                 return (0);
5117         return (error);
5118 }
5119
5120
5121 /*
5122  * Rename files.  Source and destination must either both be directories,
5123  * or both not be directories.  If target is a directory, it must be empty.
5124  */
5125 /* ARGSUSED */
5126 int
5127 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
5128 {
5129         vnode_t tvp, tdvp;
5130         vnode_t fvp, fdvp;
5131         struct nameidata fromnd, tond;
5132         vfs_context_t ctx = vfs_context_current();
5133         int error;
5134         int do_retry;
5135         int mntrename;
5136         int need_event;
5137         const char *oname;
5138         char *from_name = NULL, *to_name = NULL;
5139         int from_len=0, to_len=0;
5140         int holding_mntlock;
5141         mount_t locked_mp = NULL;
5142         vnode_t oparent;
5143 #if CONFIG_FSE
5144         fse_info from_finfo, to_finfo;
5145 #endif
5146         int from_truncated=0, to_truncated;
5147
5148         holding_mntlock = 0;
5149     do_retry = 0;
5150 retry:
5151         fvp = tvp = NULL;
5152         fdvp = tdvp = NULL;
5153         mntrename = FALSE;
5154
5155         NDINIT(&fromnd, DELETE, WANTPARENT | AUDITVNPATH1, UIO_USERSPACE, uap->from, ctx);
5156
5157         if ( (error = namei(&fromnd)) )
5158                 goto out1;
5159         fdvp = fromnd.ni_dvp;
5160         fvp  = fromnd.ni_vp;
5161
5162 #if CONFIG_MACF
5163         error = mac_vnode_check_rename_from(ctx, fdvp, fvp, &fromnd.ni_cnd);
5164         if (error)
5165                 goto out1;
5166 #endif
5167
5168         NDINIT(&tond, RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK , UIO_USERSPACE, uap->to, ctx);
5169         if (fvp->v_type == VDIR)
5170                 tond.ni_cnd.cn_flags |= WILLBEDIR;
5171
5172         if ( (error = namei(&tond)) ) {
5173                 /*
5174                  * Translate error code for rename("dir1", "dir2/.").
5175                  */
5176                 if (error == EISDIR && fvp->v_type == VDIR)
5177                         error = EINVAL;
5178                 goto out1;
5179         }
5180         tdvp = tond.ni_dvp;
5181         tvp  = tond.ni_vp;
5182
5183 #if CONFIG_MACF
5184         error = mac_vnode_check_rename_to(ctx,
5185             tdvp, tvp, fdvp == tdvp, &tond.ni_cnd);
5186         if (error)
5187                 goto out1;
5188 #endif
5189
5190         if (tvp != NULL) {
5191                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
5192                         error = ENOTDIR;
5193                         goto out1;
5194                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
5195                         error = EISDIR;
5196                         goto out1;
5197                 }
5198         }
5199         if (fvp == tdvp) {
5200                 error = EINVAL;
5201                 goto out1;
5202         }
5203         /*
5204          * If the source and destination are the same (i.e. they're
5205          * links to the same vnode) and the target file system is
5206          * case sensitive, then there is nothing to do.
5207          */
5208         if (fvp == tvp) {
5209                 int pathconf_val;
5210
5211                 /*
5212                  * Note: if _PC_CASE_SENSITIVE selector isn't supported,
5213                  * then assume that this file system is case sensitive.
5214                  */
5215                 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
5216                     pathconf_val != 0) {
5217                         goto out1;
5218                 }
5219         }
5220
5221         /*
5222          * Authorization.
5223          *
5224          * If tvp is a directory and not the same as fdvp, or tdvp is not
5225          * the same as fdvp, the node is moving between directories and we
5226          * need rights to remove from the old and add to the new.
5227          *
5228          * If tvp already exists and is not a directory, we need to be
5229          * allowed to delete it.
5230          *
5231          * Note that we do not inherit when renaming.
5232          *
5233          * XXX This needs to be revisited to implement the deferred-inherit bit
5234          */
5235         {
5236                 int moving = 0;
5237
5238                 error = 0;
5239                 if ((tvp != NULL) && vnode_isdir(tvp)) {
5240                         if (tvp != fdvp)
5241                                 moving = 1;
5242                 } else if (tdvp != fdvp) {
5243                         moving = 1;
5244                 }
5245                 /*
5246                  * must have delete rights to remove the old name even in
5247                  * the simple case of fdvp == tdvp.
5248                  *
5249                  * If fvp is a directory, and we are changing it's parent,
5250                  * then we also need rights to rewrite its ".." entry as well.
5251                  */
5252                 if (vnode_isdir(fvp)) {
5253                         if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE | KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
5254                                 goto auth_exit;
5255                 } else {
5256                 if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE, ctx)) != 0)
5257                         goto auth_exit;
5258                 }
5259                 if (moving) {
5260                         /* moving into tdvp or tvp, must have rights to add */
5261                         if ((error = vnode_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp,
5262                                  NULL,
5263                                  vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE,
5264                                  ctx)) != 0) {
5265                 /*
5266                  * We could encounter a race where after doing the namei, tvp stops
5267                  * being valid. If so, simply re-drive the rename call from the
5268                  * top.
5269                  */
5270                  if (error == ENOENT) {
5271                      do_retry = 1;
5272                  }
5273                                 goto auth_exit;
5274                         }
5275                 } else {
5276                         /* node staying in same directory, must be allowed to add new name */
5277                         if ((error = vnode_authorize(fdvp, NULL,
5278                                  vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, ctx)) != 0)
5279                                 goto auth_exit;
5280                 }
5281                 /* overwriting tvp */
5282                 if ((tvp != NULL) && !vnode_isdir(tvp) &&
5283                     ((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx)) != 0)) {
5284             /*
5285              * We could encounter a race where after doing the namei, tvp stops
5286              * being valid. If so, simply re-drive the rename call from the
5287              * top.
5288              */
5289             if (error == ENOENT) {
5290                 do_retry = 1;
5291             }
5292                         goto auth_exit;
5293                 }
5294
5295                 /* XXX more checks? */
5296
5297 auth_exit:
5298                 /* authorization denied */
5299                 if (error != 0)
5300                         goto out1;
5301         }
5302         /*
5303          * Allow the renaming of mount points.
5304          * - target must not exist
5305          * - target must reside in the same directory as source
5306          * - union mounts cannot be renamed
5307          * - "/" cannot be renamed
5308          */
5309         if ((fvp->v_flag & VROOT) &&
5310             (fvp->v_type == VDIR) &&
5311             (tvp == NULL)  &&
5312             (fvp->v_mountedhere == NULL)  &&
5313             (fdvp == tdvp)  &&
5314             ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0)  &&
5315             (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
5316                 vnode_t coveredvp;
5317
5318                 /* switch fvp to the covered vnode */
5319                 coveredvp = fvp->v_mount->mnt_vnodecovered;
5320                 if ( (vnode_getwithref(coveredvp)) ) {
5321                         error = ENOENT;
5322                         goto out1;
5323                 }
5324                 vnode_put(fvp);
5325
5326                 fvp = coveredvp;
5327                 mntrename = TRUE;
5328         }
5329         /*
5330          * Check for cross-device rename.
5331          */
5332         if ((fvp->v_mount != tdvp->v_mount) ||
5333             (tvp && (fvp->v_mount != tvp->v_mount))) {
5334                 error = EXDEV;
5335                 goto out1;
5336         }
5337         /*
5338          * Avoid renaming "." and "..".
5339          */
5340         if (fvp->v_type == VDIR &&
5341             ((fdvp == fvp) ||
5342              (fromnd.ni_cnd.cn_namelen == 1 && fromnd.ni_cnd.cn_nameptr[0] == '.') ||
5343              ((fromnd.ni_cnd.cn_flags | tond.ni_cnd.cn_flags) & ISDOTDOT)) ) {
5344                 error = EINVAL;
5345                 goto out1;
5346         }
5347         /*
5348          * The following edge case is caught here:
5349          * (to cannot be a descendent of from)
5350          *
5351          *       o fdvp
5352          *      /
5353          *     /
5354          *    o fvp
5355          *     \
5356          *      \
5357          *       o tdvp
5358          *      /
5359          *     /
5360          *    o tvp
5361          */
5362         if (tdvp->v_parent == fvp) {
5363                 error = EINVAL;
5364                 goto out1;
5365         }
5366
5367         /*
5368          * If source is the same as the destination (that is the
5369          * same inode number) then there is nothing to do...
5370          * EXCEPT if the underlying file system supports case
5371          * insensitivity and is case preserving.  In this case
5372          * the file system needs to handle the special case of
5373          * getting the same vnode as target (fvp) and source (tvp).
5374          *
5375          * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
5376          * and _PC_CASE_PRESERVING can have this exception, and they need to
5377          * handle the special case of getting the same vnode as target and
5378          * source.  NOTE: Then the target is unlocked going into vnop_rename,
5379          * so not to cause locking problems. There is a single reference on tvp.
5380          *
5381          * NOTE - that fvp == tvp also occurs if they are hard linked and
5382          * that correct behaviour then is just to return success without doing
5383          * anything.
5384          */
5385         if (fvp == tvp && fdvp == tdvp) {
5386                 if (fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
5387                     !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
5388                           fromnd.ni_cnd.cn_namelen)) {
5389                         goto out1;
5390                 }
5391         }
5392
5393         if (holding_mntlock && fvp->v_mount != locked_mp) {
5394                 /*
5395                  * we're holding a reference and lock
5396                  * on locked_mp, but it no longer matches
5397                  * what we want to do... so drop our hold
5398                  */
5399                 mount_unlock_renames(locked_mp);
5400                 mount_drop(locked_mp, 0);
5401                 holding_mntlock = 0;
5402         }
5403         if (tdvp != fdvp && fvp->v_type == VDIR) {
5404                 /*
5405                  * serialize renames that re-shape
5406                  * the tree... if holding_mntlock is
5407                  * set, then we're ready to go...
5408                  * otherwise we
5409                  * first need to drop the iocounts
5410                  * we picked up, second take the
5411                  * lock to serialize the access,
5412                  * then finally start the lookup
5413                  * process over with the lock held
5414                  */
5415                 if (!holding_mntlock) {
5416                         /*
5417                          * need to grab a reference on
5418                          * the mount point before we
5419                          * drop all the iocounts... once
5420                          * the iocounts are gone, the mount
5421                          * could follow
5422                          */
5423                         locked_mp = fvp->v_mount;
5424                         mount_ref(locked_mp, 0);
5425
5426                         /*
5427                          * nameidone has to happen before we vnode_put(tvp)
5428                          * since it may need to release the fs_nodelock on the tvp
5429                          */
5430                         nameidone(&tond);
5431
5432                         if (tvp)
5433                                 vnode_put(tvp);
5434                         vnode_put(tdvp);
5435
5436                         /*
5437                          * nameidone has to happen before we vnode_put(fdvp)
5438                          * since it may need to release the fs_nodelock on the fvp
5439                          */
5440                         nameidone(&fromnd);
5441
5442                         vnode_put(fvp);
5443                         vnode_put(fdvp);
5444
5445                         mount_lock_renames(locked_mp);
5446                         holding_mntlock = 1;
5447
5448                         goto retry;
5449                 }
5450         } else {
5451                 /*
5452                  * when we dropped the iocounts to take
5453                  * the lock, we allowed the identity of
5454                  * the various vnodes to change... if they did,
5455                  * we may no longer be dealing with a rename
5456                  * that reshapes the tree... once we're holding
5457                  * the iocounts, the vnodes can't change type
5458                  * so we're free to drop the lock at this point
5459                  * and continue on
5460                  */
5461                 if (holding_mntlock) {
5462                         mount_unlock_renames(locked_mp);
5463                         mount_drop(locked_mp, 0);
5464                         holding_mntlock = 0;
5465                 }
5466         }
5467         // save these off so we can later verify that fvp is the same
5468         oname   = fvp->v_name;
5469         oparent = fvp->v_parent;
5470
5471 #if CONFIG_FSE
5472         need_event = need_fsevent(FSE_RENAME, fvp);
5473         if (need_event) {
5474                 get_fse_info(fvp, &from_finfo, ctx);
5475
5476                 if (tvp) {
5477                         get_fse_info(tvp, &to_finfo, ctx);
5478                 }
5479         }
5480 #else
5481         need_event = 0;
5482 #endif /* CONFIG_FSE */
5483
5484         if (need_event || kauth_authorize_fileop_has_listeners()) {
5485                 GET_PATH(from_name);
5486                 if (from_name == NULL) {
5487                         error = ENOMEM;
5488                         goto out1;
5489                 }
5490
5491                 from_len = safe_getpath(fdvp, fromnd.ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
5492
5493                 GET_PATH(to_name);
5494                 if (to_name == NULL) {
5495                         error = ENOMEM;
5496                         goto out1;
5497                 }
5498
5499                 to_len = safe_getpath(tdvp, tond.ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
5500         }
5501
5502         error = VNOP_RENAME(fdvp, fvp, &fromnd.ni_cnd,
5503                             tdvp, tvp, &tond.ni_cnd,
5504                             ctx);
5505
5506         if (holding_mntlock) {
5507                 /*
5508                  * we can drop our serialization
5509                  * lock now
5510                  */
5511                 mount_unlock_renames(locked_mp);
5512                 mount_drop(locked_mp, 0);
5513                 holding_mntlock = 0;
5514         }
5515         if (error) {
5516         /*
5517          * We may encounter a race in the VNOP where the destination didn't
5518          * exist when we did the namei, but it does by the time we go and
5519          * try to create the entry. In this case, we should re-drive this rename
5520          * call from the top again.  Currently, only HFS bubbles out ERECYCLE,
5521                  * but other filesystems susceptible to this race could return it, too.
5522          */
5523         if (error == ERECYCLE) {
5524             do_retry = 1;
5525         }
5526
5527                 goto out1;
5528         }
5529
5530         /* call out to allow 3rd party notification of rename.
5531          * Ignore result of kauth_authorize_fileop call.
5532          */
5533         kauth_authorize_fileop(vfs_context_ucred(ctx),
5534                         KAUTH_FILEOP_RENAME,
5535                         (uintptr_t)from_name, (uintptr_t)to_name);
5536
5537 #if CONFIG_FSE
5538         if (from_name != NULL && to_name != NULL) {
5539                 if (from_truncated || to_truncated) {
5540                         // set it here since only the from_finfo gets reported up to user space
5541                         from_finfo.mode |= FSE_TRUNCATED_PATH;
5542                 }
5543                 if (tvp) {
5544                         add_fsevent(FSE_RENAME, ctx,
5545                                     FSE_ARG_STRING, from_len, from_name,
5546                                     FSE_ARG_FINFO, &from_finfo,
5547                                     FSE_ARG_STRING, to_len, to_name,
5548                                     FSE_ARG_FINFO, &to_finfo,
5549                                     FSE_ARG_DONE);
5550                 } else {
5551                         add_fsevent(FSE_RENAME, ctx,
5552                                     FSE_ARG_STRING, from_len, from_name,
5553                                     FSE_ARG_FINFO, &from_finfo,
5554                                     FSE_ARG_STRING, to_len, to_name,
5555                                     FSE_ARG_DONE);
5556                 }
5557         }
5558 #endif /* CONFIG_FSE */
5559
5560         /*
5561          * update filesystem's mount point data
5562          */
5563         if (mntrename) {
5564                 char *cp, *pathend, *mpname;
5565                 char * tobuf;
5566                 struct mount *mp;
5567                 int maxlen;
5568                 size_t len = 0;
5569
5570                 mp = fvp->v_mountedhere;
5571
5572                 if (vfs_busy(mp, LK_NOWAIT)) {
5573                         error = EBUSY;
5574                         goto out1;
5575                 }
5576                 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
5577
5578                 error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len);
5579                 if (!error) {
5580                         /* find current mount point prefix */
5581                         pathend = &mp->mnt_vfsstat.f_mntonname[0];
5582                         for (cp = pathend; *cp != '\0'; ++cp) {
5583                                 if (*cp == '/')
5584                                         pathend = cp + 1;
5585                         }
5586                         /* find last component of target name */
5587                         for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
5588                                 if (*cp == '/')
5589                                         mpname = cp + 1;
5590                         }
5591                         /* append name to prefix */
5592                         maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
5593                         bzero(pathend, maxlen);
5594                         strlcpy(pathend, mpname, maxlen);
5595                 }
5596                 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
5597
5598                 vfs_unbusy(mp);
5599         }
5600         /*
5601          * fix up name & parent pointers.  note that we first
5602          * check that fvp has the same name/parent pointers it
5603          * had before the rename call... this is a 'weak' check
5604          * at best...
5605          */
5606         if (oname == fvp->v_name && oparent == fvp->v_parent) {
5607                 int update_flags;
5608
5609                 update_flags = VNODE_UPDATE_NAME;
5610
5611                 if (fdvp != tdvp)
5612                         update_flags |= VNODE_UPDATE_PARENT;
5613
5614                 vnode_update_identity(fvp, tdvp, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen, tond.ni_cnd.cn_hash, update_flags);
5615         }
5616 out1:
5617         if (to_name != NULL) {
5618                 RELEASE_PATH(to_name);
5619                 to_name = NULL;
5620         }
5621         if (from_name != NULL) {
5622                 RELEASE_PATH(from_name);
5623                 from_name = NULL;
5624         }
5625         if (holding_mntlock) {
5626                 mount_unlock_renames(locked_mp);
5627                 mount_drop(locked_mp, 0);
5628                 holding_mntlock = 0;
5629         }
5630         if (tdvp) {
5631                 /*
5632                  * nameidone has to happen before we vnode_put(tdvp)
5633                  * since it may need to release the fs_nodelock on the tdvp
5634                  */
5635                 nameidone(&tond);
5636
5637                 if (tvp)
5638                         vnode_put(tvp);
5639                 vnode_put(tdvp);
5640         }
5641         if (fdvp) {
5642                 /*
5643                  * nameidone has to happen before we vnode_put(fdvp)
5644                  * since it may need to release the fs_nodelock on the fdvp
5645                  */
5646                 nameidone(&fromnd);
5647
5648                 if (fvp)
5649                         vnode_put(fvp);
5650                 vnode_put(fdvp);
5651         }
5652
5653     /*
5654      * If things changed after we did the namei, then we will re-drive
5655      * this rename call from the top.
5656      */
5657         if(do_retry) {
5658         do_retry = 0;
5659                 goto retry;
5660         }
5661
5662         return (error);
5663 }
5664
5665 /*
5666  * Make a directory file.
5667  *
5668  * Returns:     0                       Success
5669  *              EEXIST
5670  *      namei:???
5671  *      vnode_authorize:???
5672  *      vn_create:???
5673  */
5674 /* ARGSUSED */
5675 static int
5676 mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
5677 {
5678         vnode_t vp, dvp;
5679         int error;
5680         int update_flags = 0;
5681         struct nameidata nd;
5682
5683         AUDIT_ARG(mode, vap->va_mode);
5684         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
5685                 UIO_USERSPACE, path, ctx);
5686         nd.ni_cnd.cn_flags |= WILLBEDIR;
5687         error = namei(&nd);
5688         if (error)
5689                 return (error);
5690         dvp = nd.ni_dvp;
5691         vp = nd.ni_vp;
5692
5693         if (vp != NULL) {
5694                 error = EEXIST;
5695                 goto out;
5696         }
5697
5698         VATTR_SET(vap, va_type, VDIR);
5699
5700 #if CONFIG_MACF
5701         error = mac_vnode_check_create(ctx,
5702             nd.ni_dvp, &nd.ni_cnd, vap);
5703         if (error)
5704                 goto out;
5705 #endif
5706
5707         /* authorize addition of a directory to the parent */
5708         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
5709                 goto out;
5710
5711
5712         /* make the directory */
5713         if ((error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx)) != 0)
5714                 goto out;
5715
5716         // Make sure the name & parent pointers are hooked up
5717         if (vp->v_name == NULL)
5718                 update_flags |= VNODE_UPDATE_NAME;
5719         if (vp->v_parent == NULLVP)
5720                 update_flags |= VNODE_UPDATE_PARENT;
5721
5722         if (update_flags)
5723                 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
5724
5725 #if CONFIG_FSE
5726         add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
5727 #endif
5728
5729 out:
5730         /*
5731          * nameidone has to happen before we vnode_put(dvp)
5732          * since it may need to release the fs_nodelock on the dvp
5733          */
5734         nameidone(&nd);
5735
5736         if (vp)
5737                 vnode_put(vp);
5738         vnode_put(dvp);
5739
5740         return (error);
5741 }
5742
5743 /*
5744  * mkdir_extended: Create a directory; with extended security (ACL).
5745  *
5746  * Parameters:    p                       Process requesting to create the directory
5747  *                uap                     User argument descriptor (see below)
5748  *                retval                  (ignored)
5749  *
5750  * Indirect:      uap->path               Path of directory to create
5751  *                uap->mode               Access permissions to set
5752  *                uap->xsecurity          ACL to set
5753  *
5754  * Returns:        0                      Success
5755  *                !0                      Not success
5756  *
5757  */
5758 int
5759 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
5760 {
5761         int ciferror;
5762         kauth_filesec_t xsecdst;
5763         struct vnode_attr va;
5764
5765         AUDIT_ARG(owner, uap->uid, uap->gid);
5766
5767         xsecdst = NULL;
5768         if ((uap->xsecurity != USER_ADDR_NULL) &&
5769             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
5770                 return ciferror;
5771
5772         VATTR_INIT(&va);
5773         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
5774         if (xsecdst != NULL)
5775                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5776
5777         ciferror = mkdir1(vfs_context_current(), uap->path, &va);
5778         if (xsecdst != NULL)
5779                 kauth_filesec_free(xsecdst);
5780         return ciferror;
5781 }
5782
5783 int
5784 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
5785 {
5786         struct vnode_attr va;
5787
5788         VATTR_INIT(&va);
5789         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
5790
5791         return(mkdir1(vfs_context_current(), uap->path, &va));
5792 }
5793
5794 /*
5795  * Remove a directory file.
5796  */
5797 /* ARGSUSED */
5798 int
5799 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
5800 {
5801         vnode_t vp, dvp;
5802         int error;
5803         struct nameidata nd;
5804         vfs_context_t ctx = vfs_context_current();
5805
5806         int restart_flag;
5807         uint32_t oldvp_id = UINT32_MAX;
5808
5809         /*
5810          * This loop exists to restart rmdir in the unlikely case that two
5811          * processes are simultaneously trying to remove the same directory
5812          * containing orphaned appleDouble files.
5813          */
5814         do {
5815                 restart_flag = 0;
5816
5817                 NDINIT(&nd, DELETE, LOCKPARENT | AUDITVNPATH1,
5818                                 UIO_USERSPACE, uap->path, ctx);
5819                 error = namei(&nd);
5820                 if (error)
5821                         return (error);
5822
5823                 dvp = nd.ni_dvp;
5824                 vp = nd.ni_vp;
5825
5826
5827                 /*
5828                  * If being restarted check if the new vp
5829                  * still has the same v_id.
5830                  */
5831                 if (oldvp_id != UINT32_MAX && oldvp_id != vp->v_id) {
5832                         error = ENOENT;
5833                         goto out;
5834                 }
5835
5836                 if (vp->v_type != VDIR) {
5837                         /*
5838                          * rmdir only deals with directories
5839                          */
5840                         error = ENOTDIR;
5841                 } else if (dvp == vp) {
5842                         /*
5843                          * No rmdir "." please.
5844                          */
5845                         error = EINVAL;
5846                 } else if (vp->v_flag & VROOT) {
5847                         /*
5848                          * The root of a mounted filesystem cannot be deleted.
5849                          */
5850                         error = EBUSY;
5851                 } else {
5852 #if CONFIG_MACF
5853                         error = mac_vnode_check_unlink(ctx, dvp,
5854                                         vp, &nd.ni_cnd);
5855                         if (!error)
5856 #endif
5857                                 error = vnode_authorize(vp, nd.ni_dvp, KAUTH_VNODE_DELETE, ctx);
5858                 }
5859                 if (!error) {
5860                         char     *path = NULL;
5861                         int       len=0;
5862                         int has_listeners = 0;
5863                         int need_event = 0;
5864                         int truncated = 0;
5865 #if CONFIG_FSE
5866                         fse_info  finfo;
5867
5868                         need_event = need_fsevent(FSE_DELETE, dvp);
5869                         if (need_event) {
5870                                 get_fse_info(vp, &finfo, ctx);
5871                         }
5872 #endif
5873                         has_listeners = kauth_authorize_fileop_has_listeners();
5874                         if (need_event || has_listeners) {
5875                                 GET_PATH(path);
5876                                 if (path == NULL) {
5877                                         error = ENOMEM;
5878                                         goto out;
5879                                 }
5880
5881                                 len = safe_getpath(vp, NULL, path, MAXPATHLEN, &truncated);
5882 #if CONFIG_FSE
5883                                 if (truncated) {
5884                                         finfo.mode |= FSE_TRUNCATED_PATH;
5885                                 }
5886 #endif
5887                         }
5888
5889                         error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
5890
5891                         /*
5892                          * Special case to remove orphaned AppleDouble
5893                          * files. I don't like putting this in the kernel,
5894                          * but carbon does not like putting this in carbon either,
5895                          * so here we are.
5896                          */
5897                         if (error == ENOTEMPTY) {
5898                                 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
5899                                 if (error == EBUSY) {
5900                                         oldvp_id = vp->v_id;
5901                                         goto out;
5902                                 }
5903
5904
5905                                 /*
5906                                  * Assuming everything went well, we will try the RMDIR again
5907                                  */
5908                                 if (!error)
5909                                         error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
5910                         }
5911
5912                         /*
5913                          * Call out to allow 3rd party notification of delete.
5914                          * Ignore result of kauth_authorize_fileop call.
5915                          */
5916                         if (!error) {
5917                                 if (has_listeners) {
5918                                         kauth_authorize_fileop(vfs_context_ucred(ctx),
5919                                                         KAUTH_FILEOP_DELETE,
5920                                                         (uintptr_t)vp,
5921                                                         (uintptr_t)path);
5922                                 }
5923
5924                                 if (vp->v_flag & VISHARDLINK) {
5925                                     // see the comment in unlink1() about why we update
5926                                     // the parent of a hard link when it is removed
5927                                     vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
5928                                 }
5929
5930 #if CONFIG_FSE
5931                                 if (need_event) {
5932                                         add_fsevent(FSE_DELETE, ctx,
5933                                                         FSE_ARG_STRING, len, path,
5934                                                         FSE_ARG_FINFO, &finfo,
5935                                                         FSE_ARG_DONE);
5936                                 }
5937 #endif
5938                         }
5939                         if (path != NULL)
5940                                 RELEASE_PATH(path);
5941                 }
5942
5943 out:
5944                 /*
5945                  * nameidone has to happen before we vnode_put(dvp)
5946                  * since it may need to release the fs_nodelock on the dvp
5947                  */
5948                 nameidone(&nd);
5949
5950                 vnode_put(dvp);
5951                 vnode_put(vp);
5952
5953                 if (restart_flag == 0) {
5954                         wakeup_one((caddr_t)vp);
5955                         return (error);
5956                 }
5957                 tsleep(vp, PVFS, "rm AD", 1);
5958
5959         } while (restart_flag != 0);
5960
5961         return (error);
5962
5963 }
5964
5965 /* Get direntry length padded to 8 byte alignment */
5966 #define DIRENT64_LEN(namlen) \
5967         ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
5968
5969 static errno_t
5970 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
5971                 int *numdirent, vfs_context_t ctxp)
5972 {
5973         /* Check if fs natively supports VNODE_READDIR_EXTENDED */
5974         if (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) {
5975                 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
5976         } else {
5977                 size_t bufsize;
5978                 void * bufptr;
5979                 uio_t auio;
5980                 struct direntry entry64;
5981                 struct dirent *dep;
5982                 int bytesread;
5983                 int error;
5984
5985                 /*
5986                  * Our kernel buffer needs to be smaller since re-packing
5987                  * will expand each dirent.  The worse case (when the name
5988                  * length is 3) corresponds to a struct direntry size of 32
5989                  * bytes (8-byte aligned) and a struct dirent size of 12 bytes
5990                  * (4-byte aligned).  So having a buffer that is 3/8 the size
5991                  * will prevent us from reading more than we can pack.
5992                  *
5993                  * Since this buffer is wired memory, we will limit the
5994                  * buffer size to a maximum of 32K. We would really like to
5995                  * use 32K in the MIN(), but we use magic number 87371 to
5996                  * prevent uio_resid() * 3 / 8 from overflowing.
5997                  */
5998                 bufsize = 3 * MIN(uio_resid(uio), 87371) / 8;
5999                 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
6000                 if (bufptr == NULL) {
6001                         return ENOMEM;
6002                 }
6003
6004                 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
6005                 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
6006                 auio->uio_offset = uio->uio_offset;
6007
6008                 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
6009
6010                 dep = (struct dirent *)bufptr;
6011                 bytesread = bufsize - uio_resid(auio);
6012
6013                 /*
6014                  * Convert all the entries and copy them out to user's buffer.
6015                  */
6016                 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
6017                         /* Convert a dirent to a dirent64. */
6018                         entry64.d_ino = dep->d_ino;
6019                         entry64.d_seekoff = 0;
6020                         entry64.d_reclen = DIRENT64_LEN(dep->d_namlen);
6021                         entry64.d_namlen = dep->d_namlen;
6022                         entry64.d_type = dep->d_type;
6023                         bcopy(dep->d_name, entry64.d_name, dep->d_namlen + 1);
6024
6025                         /* Move to next entry. */
6026                         dep = (struct dirent *)((char *)dep + dep->d_reclen);
6027
6028                         /* Copy entry64 to user's buffer. */
6029                         error = uiomove((caddr_t)&entry64, entry64.d_reclen, uio);
6030                 }
6031
6032                 /* Update the real offset using the offset we got from VNOP_READDIR. */
6033                 if (error == 0) {
6034                         uio->uio_offset = auio->uio_offset;
6035                 }
6036                 uio_free(auio);
6037                 FREE(bufptr, M_TEMP);
6038                 return (error);
6039         }
6040 }
6041
6042 /*
6043  * Read a block of directory entries in a file system independent format.
6044  */
6045 static int
6046 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
6047                      off_t *offset, int flags)
6048 {
6049         vnode_t vp;
6050         struct vfs_context context = *vfs_context_current();    /* local copy */
6051         struct fileproc *fp;
6052         uio_t auio;
6053         int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6054         off_t loff;
6055         int error, eofflag, numdirent;
6056         char uio_buf[ UIO_SIZEOF(1) ];
6057
6058         error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
6059         if (error) {
6060                 return (error);
6061         }
6062         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
6063                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6064                 error = EBADF;
6065                 goto out;
6066         }
6067
6068 #if CONFIG_MACF
6069         error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
6070         if (error)
6071                 goto out;
6072 #endif
6073         if ( (error = vnode_getwithref(vp)) ) {
6074                 goto out;
6075         }
6076         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6077
6078 unionread:
6079         if (vp->v_type != VDIR) {
6080                 (void)vnode_put(vp);
6081                 error = EINVAL;
6082                 goto out;
6083         }
6084
6085 #if CONFIG_MACF
6086         error = mac_vnode_check_readdir(&context, vp);
6087         if (error != 0) {
6088                 (void)vnode_put(vp);
6089                 goto out;
6090         }
6091 #endif /* MAC */
6092
6093         loff = fp->f_fglob->fg_offset;
6094         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
6095         uio_addiov(auio, bufp, bufsize);
6096
6097         if (flags & VNODE_READDIR_EXTENDED) {
6098                 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
6099                 fp->f_fglob->fg_offset = uio_offset(auio);
6100         } else {
6101                 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
6102                 fp->f_fglob->fg_offset = uio_offset(auio);
6103         }
6104         if (error) {
6105                 (void)vnode_put(vp);
6106                 goto out;
6107         }
6108
6109         if ((user_ssize_t)bufsize == uio_resid(auio)){
6110                 if (union_dircheckp) {
6111                         error = union_dircheckp(&vp, fp, &context);
6112                         if (error == -1)
6113                                 goto unionread;
6114                         if (error)
6115                                 goto out;
6116                 }
6117
6118                 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) {
6119                         struct vnode *tvp = vp;
6120                         vp = vp->v_mount->mnt_vnodecovered;
6121                         vnode_getwithref(vp);
6122                         vnode_ref(vp);
6123                         fp->f_fglob->fg_data = (caddr_t) vp;
6124                         fp->f_fglob->fg_offset = 0;
6125                         vnode_rele(tvp);
6126                         vnode_put(tvp);
6127                         goto unionread;
6128                 }
6129         }
6130
6131         vnode_put(vp);
6132         if (offset) {
6133                 *offset = loff;
6134         }
6135
6136         *bytesread = bufsize - uio_resid(auio);
6137 out:
6138         file_drop(fd);
6139         return (error);
6140 }
6141
6142
6143 int
6144 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
6145 {
6146         off_t offset;
6147         ssize_t bytesread;
6148         int error;
6149
6150         AUDIT_ARG(fd, uap->fd);
6151         error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
6152
6153         if (error == 0) {
6154                 if (proc_is64bit(p)) {
6155                         user64_long_t base = (user64_long_t)offset;
6156                         error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
6157                 } else {
6158                         user32_long_t base = (user32_long_t)offset;
6159                         error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
6160                 }
6161                 *retval = bytesread;
6162         }
6163         return (error);
6164 }
6165
6166 int
6167 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
6168 {
6169         off_t offset;
6170         ssize_t bytesread;
6171         int error;
6172
6173         AUDIT_ARG(fd, uap->fd);
6174         error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
6175
6176         if (error == 0) {
6177                 *retval = bytesread;
6178                 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
6179         }
6180         return (error);
6181 }
6182
6183
6184 /*
6185  * Set the mode mask for creation of filesystem nodes.
6186  * XXX implement xsecurity
6187  */
6188 #define UMASK_NOXSECURITY        (void *)1      /* leave existing xsecurity alone */
6189 static int
6190 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
6191 {
6192         struct filedesc *fdp;
6193
6194         AUDIT_ARG(mask, newmask);
6195         proc_fdlock(p);
6196         fdp = p->p_fd;
6197         *retval = fdp->fd_cmask;
6198         fdp->fd_cmask = newmask & ALLPERMS;
6199         proc_fdunlock(p);
6200         return (0);
6201 }
6202
6203 /*
6204  * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
6205  *
6206  * Parameters:    p                       Process requesting to set the umask
6207  *                uap                     User argument descriptor (see below)
6208  *                retval                  umask of the process (parameter p)
6209  *
6210  * Indirect:      uap->newmask            umask to set
6211  *                uap->xsecurity          ACL to set
6212  *
6213  * Returns:        0                      Success
6214  *                !0                      Not success
6215  *
6216  */
6217 int
6218 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
6219 {
6220         int ciferror;
6221         kauth_filesec_t xsecdst;
6222
6223         xsecdst = KAUTH_FILESEC_NONE;
6224         if (uap->xsecurity != USER_ADDR_NULL) {
6225                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6226                         return ciferror;
6227         } else {
6228                 xsecdst = KAUTH_FILESEC_NONE;
6229         }
6230
6231         ciferror = umask1(p, uap->newmask, xsecdst, retval);
6232
6233         if (xsecdst != KAUTH_FILESEC_NONE)
6234                 kauth_filesec_free(xsecdst);
6235         return ciferror;
6236 }
6237
6238 int
6239 umask(proc_t p, struct umask_args *uap, int32_t *retval)
6240 {
6241         return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
6242 }
6243
6244 /*
6245  * Void all references to file by ripping underlying filesystem
6246  * away from vnode.
6247  */
6248 /* ARGSUSED */
6249 int
6250 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
6251 {
6252         vnode_t vp;
6253         struct vnode_attr va;
6254         vfs_context_t ctx = vfs_context_current();
6255         int error;
6256         struct nameidata nd;
6257
6258         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
6259                 UIO_USERSPACE, uap->path, ctx);
6260         error = namei(&nd);
6261         if (error)
6262                 return (error);
6263         vp = nd.ni_vp;
6264
6265         nameidone(&nd);
6266
6267         if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
6268                 error = ENOTSUP;
6269                 goto out;
6270         }
6271
6272         if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
6273                 error = EBUSY;
6274                 goto out;
6275         }
6276
6277 #if CONFIG_MACF
6278         error = mac_vnode_check_revoke(ctx, vp);
6279         if (error)
6280                 goto out;
6281 #endif
6282
6283         VATTR_INIT(&va);
6284         VATTR_WANTED(&va, va_uid);
6285         if ((error = vnode_getattr(vp, &va, ctx)))
6286                 goto out;
6287         if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
6288             (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
6289                 goto out;
6290         if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
6291                 VNOP_REVOKE(vp, REVOKEALL, ctx);
6292 out:
6293         vnode_put(vp);
6294         return (error);
6295 }
6296
6297
6298 /*
6299  *  HFS/HFS PlUS SPECIFIC SYSTEM CALLS
6300  *  The following system calls are designed to support features
6301  *  which are specific to the HFS & HFS Plus volume formats
6302  */
6303
6304 #ifdef __APPLE_API_OBSOLETE
6305
6306 /************************************************/
6307 /* *** Following calls will be deleted soon *** */
6308 /************************************************/
6309
6310 /*
6311  * Make a complex file.  A complex file is one with multiple forks (data streams)
6312  */
6313 /* ARGSUSED */
6314 int
6315 mkcomplex(__unused proc_t p, __unused struct mkcomplex_args *uap, __unused int32_t *retval)
6316 {
6317         return (ENOTSUP);
6318 }
6319
6320 /*
6321  * Extended stat call which returns volumeid and vnodeid as well as other info
6322  */
6323 /* ARGSUSED */
6324 int
6325 statv(__unused proc_t p,
6326           __unused struct statv_args *uap,
6327           __unused int32_t *retval)
6328 {
6329         return (ENOTSUP);       /*  We'll just return an error for now */
6330
6331 } /* end of statv system call */
6332
6333 /*
6334 * Extended lstat call which returns volumeid and vnodeid as well as other info
6335 */
6336 /* ARGSUSED */
6337 int
6338 lstatv(__unused proc_t p,
6339            __unused struct lstatv_args *uap,
6340            __unused int32_t *retval)
6341 {
6342        return (ENOTSUP);        /*  We'll just return an error for now */
6343 } /* end of lstatv system call */
6344
6345 /*
6346 * Extended fstat call which returns volumeid and vnodeid as well as other info
6347 */
6348 /* ARGSUSED */
6349 int
6350 fstatv(__unused proc_t p,
6351            __unused struct fstatv_args *uap,
6352            __unused int32_t *retval)
6353 {
6354        return (ENOTSUP);        /*  We'll just return an error for now */
6355 } /* end of fstatv system call */
6356
6357
6358 /************************************************/
6359 /* *** Preceding calls will be deleted soon *** */
6360 /************************************************/
6361
6362 #endif /* __APPLE_API_OBSOLETE */
6363
6364 /*
6365 * Obtain attribute information on objects in a directory while enumerating
6366 * the directory.  This call does not yet support union mounted directories.
6367 * TO DO
6368 *  1.union mounted directories.
6369 */
6370
6371 /* ARGSUSED */
6372 int
6373 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
6374 {
6375         vnode_t vp;
6376         struct fileproc *fp;
6377         uio_t auio = NULL;
6378         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6379         uint32_t count;
6380         uint32_t newstate;
6381         int error, eofflag;
6382         uint32_t loff;
6383         struct attrlist attributelist;
6384         vfs_context_t ctx = vfs_context_current();
6385         int fd = uap->fd;
6386         char uio_buf[ UIO_SIZEOF(1) ];
6387         kauth_action_t action;
6388
6389         AUDIT_ARG(fd, fd);
6390
6391         /* Get the attributes into kernel space */
6392         if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
6393                 return(error);
6394         }
6395         if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
6396                 return(error);
6397         }
6398         if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
6399                 return (error);
6400         }
6401         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
6402                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6403                 error = EBADF;
6404                 goto out;
6405         }
6406
6407
6408 #if CONFIG_MACF
6409         error = mac_file_check_change_offset(vfs_context_ucred(ctx),
6410             fp->f_fglob);
6411         if (error)
6412                 goto out;
6413 #endif
6414
6415
6416         if ( (error = vnode_getwithref(vp)) )
6417                 goto out;
6418
6419         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6420
6421         if (vp->v_type != VDIR) {
6422                 (void)vnode_put(vp);
6423                 error = EINVAL;
6424                 goto out;
6425         }
6426
6427 #if CONFIG_MACF
6428         error = mac_vnode_check_readdir(ctx, vp);
6429         if (error != 0) {
6430                 (void)vnode_put(vp);
6431                 goto out;
6432         }
6433 #endif /* MAC */
6434
6435         /* set up the uio structure which will contain the users return buffer */
6436         loff = fp->f_fglob->fg_offset;
6437         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ,
6438             &uio_buf[0], sizeof(uio_buf));
6439         uio_addiov(auio, uap->buffer, uap->buffersize);
6440
6441         /*
6442          * If the only item requested is file names, we can let that past with
6443          * just LIST_DIRECTORY.  If they want any other attributes, that means
6444          * they need SEARCH as well.
6445          */
6446         action = KAUTH_VNODE_LIST_DIRECTORY;
6447         if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
6448             attributelist.fileattr || attributelist.dirattr)
6449                 action |= KAUTH_VNODE_SEARCH;
6450
6451         if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
6452
6453                 /* Believe it or not, uap->options only has 32-bits of valid
6454                  * info, so truncate before extending again */
6455                 error = VNOP_READDIRATTR(vp, &attributelist, auio,
6456                                          count,
6457                                          (u_long)(uint32_t)uap->options, &newstate, &eofflag,
6458                                          &count, ctx);
6459         }
6460         (void)vnode_put(vp);
6461
6462         if (error)
6463                 goto out;
6464         fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
6465
6466         if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
6467                 goto out;
6468         if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
6469                 goto out;
6470         if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
6471                 goto out;
6472
6473         *retval = eofflag;  /* similar to getdirentries */
6474         error = 0;
6475 out:
6476         file_drop(fd);
6477         return (error); /* return error earlier, an retval of 0 or 1 now */
6478
6479 } /* end of getdirentryattr system call */
6480
6481 /*
6482 * Exchange data between two files
6483 */
6484
6485 /* ARGSUSED */
6486 int
6487 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
6488 {
6489
6490         struct nameidata fnd, snd;
6491         vfs_context_t ctx = vfs_context_current();
6492         vnode_t fvp;
6493         vnode_t svp;
6494         int error;
6495         u_int32_t nameiflags;
6496         char *fpath = NULL;
6497         char *spath = NULL;
6498         int   flen=0, slen=0;
6499         int from_truncated=0, to_truncated=0;
6500 #if CONFIG_FSE
6501         fse_info f_finfo, s_finfo;
6502 #endif
6503
6504         nameiflags = 0;
6505         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6506
6507     NDINIT(&fnd, LOOKUP, nameiflags | AUDITVNPATH1,
6508                 UIO_USERSPACE, uap->path1, ctx);
6509
6510     error = namei(&fnd);
6511     if (error)
6512         goto out2;
6513
6514         nameidone(&fnd);
6515         fvp = fnd.ni_vp;
6516
6517     NDINIT(&snd, LOOKUP | CN_NBMOUNTLOOK, nameiflags | AUDITVNPATH2,
6518                 UIO_USERSPACE, uap->path2, ctx);
6519
6520     error = namei(&snd);
6521     if (error) {
6522                 vnode_put(fvp);
6523                 goto out2;
6524     }
6525         nameidone(&snd);
6526         svp = snd.ni_vp;
6527
6528         /*
6529          * if the files are the same, return an inval error
6530          */
6531         if (svp == fvp) {
6532                 error = EINVAL;
6533                 goto out;
6534         }
6535
6536         /*
6537          * if the files are on different volumes, return an error
6538          */
6539         if (svp->v_mount != fvp->v_mount) {
6540                 error = EXDEV;
6541                 goto out;
6542         }
6543
6544 #if CONFIG_MACF
6545         error = mac_vnode_check_exchangedata(ctx,
6546             fvp, svp);
6547         if (error)
6548                 goto out;
6549 #endif
6550         if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
6551             ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
6552                 goto out;
6553
6554         if (
6555 #if CONFIG_FSE
6556         need_fsevent(FSE_EXCHANGE, fvp) ||
6557 #endif
6558         kauth_authorize_fileop_has_listeners()) {
6559                 GET_PATH(fpath);
6560                 GET_PATH(spath);
6561                 if (fpath == NULL || spath == NULL) {
6562                         error = ENOMEM;
6563                         goto out;
6564                 }
6565
6566                 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
6567                 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
6568
6569 #if CONFIG_FSE
6570                 get_fse_info(fvp, &f_finfo, ctx);
6571                 get_fse_info(svp, &s_finfo, ctx);
6572                 if (from_truncated || to_truncated) {
6573                         // set it here since only the f_finfo gets reported up to user space
6574                         f_finfo.mode |= FSE_TRUNCATED_PATH;
6575                 }
6576 #endif
6577         }
6578         /* Ok, make the call */
6579         error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
6580
6581         if (error == 0) {
6582             const char *tmpname;
6583
6584             if (fpath != NULL && spath != NULL) {
6585                     /* call out to allow 3rd party notification of exchangedata.
6586                      * Ignore result of kauth_authorize_fileop call.
6587                      */
6588                     kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
6589                                            (uintptr_t)fpath, (uintptr_t)spath);
6590             }
6591             name_cache_lock();
6592
6593             tmpname     = fvp->v_name;
6594             fvp->v_name = svp->v_name;
6595             svp->v_name = tmpname;
6596
6597             if (fvp->v_parent != svp->v_parent) {
6598                 vnode_t tmp;
6599
6600                 tmp           = fvp->v_parent;
6601                 fvp->v_parent = svp->v_parent;
6602                 svp->v_parent = tmp;
6603             }
6604             name_cache_unlock();
6605
6606 #if CONFIG_FSE
6607             if (fpath != NULL && spath != NULL) {
6608                     add_fsevent(FSE_EXCHANGE, ctx,
6609                                 FSE_ARG_STRING, flen, fpath,
6610                                 FSE_ARG_FINFO, &f_finfo,
6611                                 FSE_ARG_STRING, slen, spath,
6612                                 FSE_ARG_FINFO, &s_finfo,
6613                                 FSE_ARG_DONE);
6614             }
6615 #endif
6616         }
6617
6618 out:
6619         if (fpath != NULL)
6620                 RELEASE_PATH(fpath);
6621         if (spath != NULL)
6622                 RELEASE_PATH(spath);
6623         vnode_put(svp);
6624         vnode_put(fvp);
6625 out2:
6626         return (error);
6627 }
6628
6629
6630 /* ARGSUSED */
6631
6632 int
6633 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
6634 {
6635         vnode_t vp;
6636         int error=0;
6637         int fserror = 0;
6638         struct nameidata nd;
6639         struct user64_fssearchblock searchblock;
6640         struct searchstate *state;
6641         struct attrlist *returnattrs;
6642         struct timeval timelimit;
6643         void *searchparams1,*searchparams2;
6644         uio_t auio = NULL;
6645         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6646         uint32_t nummatches;
6647         int mallocsize;
6648         uint32_t nameiflags;
6649         vfs_context_t ctx = vfs_context_current();
6650         char uio_buf[ UIO_SIZEOF(1) ];
6651
6652         /* Start by copying in fsearchblock paramater list */
6653     if (IS_64BIT_PROCESS(p)) {
6654         error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
6655         timelimit.tv_sec = searchblock.timelimit.tv_sec;
6656         timelimit.tv_usec = searchblock.timelimit.tv_usec;
6657     }
6658     else {
6659         struct user32_fssearchblock tmp_searchblock;
6660
6661         error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
6662         // munge into 64-bit version
6663         searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
6664         searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
6665         searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
6666         searchblock.maxmatches = tmp_searchblock.maxmatches;
6667                 /*
6668                  * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
6669                  * from a 32 bit long, and tv_usec is already a signed 32 bit int.
6670                  */
6671         timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
6672         timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
6673         searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
6674         searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
6675         searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
6676         searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
6677         searchblock.searchattrs = tmp_searchblock.searchattrs;
6678     }
6679         if (error)
6680                 return(error);
6681
6682         /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
6683          */
6684         if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
6685                 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
6686                 return(EINVAL);
6687
6688         /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
6689         /* It all has to do into local memory and it's not that big so we might as well  put it all together. */
6690         /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
6691         /* block.                                                                                             */
6692
6693         mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
6694                       sizeof(struct attrlist) + sizeof(struct searchstate);
6695
6696         MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
6697
6698         /* Now set up the various pointers to the correct place in our newly allocated memory */
6699
6700         searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
6701         returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
6702         state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
6703
6704         /* Now copy in the stuff given our local variables. */
6705
6706         if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
6707                 goto freeandexit;
6708
6709         if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
6710                 goto freeandexit;
6711
6712         if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
6713                 goto freeandexit;
6714
6715         if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
6716                 goto freeandexit;
6717
6718
6719         /*
6720          * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
6721          * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
6722          * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
6723          * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
6724          * validate the user-supplied data offset of the attrreference_t, we'll do it here.
6725          */
6726
6727         if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
6728                 attrreference_t* string_ref;
6729                 u_int32_t* start_length;
6730                 user64_size_t param_length;
6731
6732                 /* validate searchparams1 */
6733                 param_length = searchblock.sizeofsearchparams1;
6734                 /* skip the word that specifies length of the buffer */
6735                 start_length= (u_int32_t*) searchparams1;
6736                 start_length= start_length+1;
6737                 string_ref= (attrreference_t*) start_length;
6738
6739                 /* ensure no negative offsets or too big offsets */
6740                 if (string_ref->attr_dataoffset < 0 ) {
6741                         error = EINVAL;
6742                         goto freeandexit;
6743                 }
6744                 if (string_ref->attr_length > MAXPATHLEN) {
6745                         error = EINVAL;
6746                         goto freeandexit;
6747                 }
6748
6749                 /* Check for pointer overflow in the string ref */
6750                 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
6751                         error = EINVAL;
6752                         goto freeandexit;
6753                 }
6754
6755                 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
6756                         error = EINVAL;
6757                         goto freeandexit;
6758                 }
6759                 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
6760                         error = EINVAL;
6761                         goto freeandexit;
6762                 }
6763         }
6764
6765         /* set up the uio structure which will contain the users return buffer */
6766         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
6767                                                                   &uio_buf[0], sizeof(uio_buf));
6768     uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
6769
6770         nameiflags = 0;
6771         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6772         NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1,
6773                 UIO_USERSPACE, uap->path, ctx);
6774
6775         error = namei(&nd);
6776         if (error)
6777                 goto freeandexit;
6778
6779         nameidone(&nd);
6780         vp = nd.ni_vp;
6781
6782
6783         /*
6784          * If searchblock.maxmatches == 0, then skip the search. This has happened
6785          * before and sometimes the underlyning code doesnt deal with it well.
6786          */
6787          if (searchblock.maxmatches == 0) {
6788                 nummatches = 0;
6789                 goto saveandexit;
6790          }
6791
6792         /*
6793            Allright, we have everything we need, so lets make that call.
6794
6795            We keep special track of the return value from the file system:
6796            EAGAIN is an acceptable error condition that shouldn't keep us
6797            from copying out any results...
6798          */
6799
6800         fserror = VNOP_SEARCHFS(vp,
6801                                                         searchparams1,
6802                                                         searchparams2,
6803                                                         &searchblock.searchattrs,
6804                                                         (u_long)searchblock.maxmatches,
6805                                                         &timelimit,
6806                                                         returnattrs,
6807                                                         &nummatches,
6808                                                         (u_long)uap->scriptcode,
6809                                                         (u_long)uap->options,
6810                                                         auio,
6811                                                         state,
6812                                                         ctx);
6813
6814 saveandexit:
6815
6816         vnode_put(vp);
6817
6818         /* Now copy out the stuff that needs copying out. That means the number of matches, the
6819            search state.  Everything was already put into he return buffer by the vop call. */
6820
6821         if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
6822                 goto freeandexit;
6823
6824     if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
6825                 goto freeandexit;
6826
6827         error = fserror;
6828
6829 freeandexit:
6830
6831         FREE(searchparams1,M_TEMP);
6832
6833         return(error);
6834
6835
6836 } /* end of searchfs system call */
6837
6838
6839 /*
6840  * Make a filesystem-specific control call:
6841  */
6842 /* ARGSUSED */
6843 static int
6844 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
6845 {
6846         int error=0;
6847         boolean_t is64bit;
6848         u_int size;
6849 #define STK_PARAMS 128
6850         char stkbuf[STK_PARAMS];
6851         caddr_t data, memp;
6852         vnode_t vp = *arg_vp;
6853
6854         size = IOCPARM_LEN(cmd);
6855         if (size > IOCPARM_MAX) return (EINVAL);
6856
6857     is64bit = proc_is64bit(p);
6858
6859         memp = NULL;
6860         if (size > sizeof (stkbuf)) {
6861                 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
6862                 data = memp;
6863         } else {
6864                 data = &stkbuf[0];
6865         };
6866
6867         if (cmd & IOC_IN) {
6868                 if (size) {
6869                         error = copyin(udata, data, size);
6870                         if (error) goto FSCtl_Exit;
6871                 } else {
6872                     if (is64bit) {
6873                         *(user_addr_t *)data = udata;
6874                     }
6875                     else {
6876                         *(uint32_t *)data = (uint32_t)udata;
6877                     }
6878                 };
6879         } else if ((cmd & IOC_OUT) && size) {
6880                 /*
6881                  * Zero the buffer so the user always
6882                  * gets back something deterministic.
6883                  */
6884                 bzero(data, size);
6885         } else if (cmd & IOC_VOID) {
6886                 if (is64bit) {
6887                     *(user_addr_t *)data = udata;
6888                 }
6889                 else {
6890                     *(uint32_t *)data = (uint32_t)udata;
6891                 }
6892         }
6893
6894         /* Check to see if it's a generic command */
6895         if (IOCBASECMD(cmd) == FSCTL_SYNC_VOLUME) {
6896                 mount_t mp = vp->v_mount;
6897                 int arg = *(uint32_t*)data;
6898
6899                 /* record vid of vp so we can drop it below. */
6900                 uint32_t vvid = vp->v_id;
6901
6902                 /*
6903                  * Then grab mount_iterref so that we can release the vnode.
6904                  * Without this, a thread may call vnode_iterate_prepare then
6905                  * get into a deadlock because we've never released the root vp
6906                  */
6907                 error = mount_iterref (mp, 0);
6908                 if (error)  {
6909                         goto FSCtl_Exit;
6910                 }
6911                 vnode_put(vp);
6912
6913                 /* issue the sync for this volume */
6914                 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
6915
6916                 /*
6917                  * Then release the mount_iterref once we're done syncing; it's not
6918                  * needed for the VNOP_IOCTL below
6919                  */
6920                 mount_iterdrop(mp);
6921
6922                 if (arg & FSCTL_SYNC_FULLSYNC) {
6923                         /* re-obtain vnode iocount on the root vp, if possible */
6924                         error = vnode_getwithvid (vp, vvid);
6925                         if (error == 0) {
6926                                 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
6927                                 vnode_put (vp);
6928                         }
6929                 }
6930                 /* mark the argument VP as having been released */
6931                 *arg_vp = NULL;
6932
6933         } else if (IOCBASECMD(cmd) == FSCTL_SET_PACKAGE_EXTS) {
6934             user_addr_t ext_strings;
6935             uint32_t    num_entries;
6936             uint32_t    max_width;
6937
6938             if (   (is64bit && size != sizeof(user64_package_ext_info))
6939                 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
6940
6941                 // either you're 64-bit and passed a 64-bit struct or
6942                 // you're 32-bit and passed a 32-bit struct.  otherwise
6943                 // it's not ok.
6944                 error = EINVAL;
6945                 goto FSCtl_Exit;
6946             }
6947
6948             if (is64bit) {
6949                 ext_strings = ((user64_package_ext_info *)data)->strings;
6950                 num_entries = ((user64_package_ext_info *)data)->num_entries;
6951                 max_width   = ((user64_package_ext_info *)data)->max_width;
6952             } else {
6953                 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
6954                 num_entries = ((user32_package_ext_info *)data)->num_entries;
6955                 max_width   = ((user32_package_ext_info *)data)->max_width;
6956             }
6957
6958             error = set_package_extensions_table(ext_strings, num_entries, max_width);
6959
6960         } else if (IOCBASECMD(cmd) == FSCTL_WAIT_FOR_SYNC) {
6961                 error = tsleep((caddr_t)&sync_wait_time, PVFS|PCATCH, "sync-wait", 0);
6962                 if (error == 0) {
6963                         *(uint32_t *)data = (uint32_t)sync_wait_time;
6964                         error = 0;
6965                 } else {
6966                         error *= -1;
6967                 }
6968
6969         } else {
6970                 /* Invoke the filesystem-specific code */
6971                 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
6972         }
6973
6974
6975         /*
6976          * Copy any data to user, size was
6977          * already set and checked above.
6978          */
6979         if (error == 0 && (cmd & IOC_OUT) && size)
6980                 error = copyout(data, udata, size);
6981
6982 FSCtl_Exit:
6983         if (memp) kfree(memp, size);
6984
6985         return error;
6986 }
6987
6988 /* ARGSUSED */
6989 int
6990 fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
6991 {
6992         int error;
6993         struct nameidata nd;
6994         u_long nameiflags;
6995         vnode_t vp = NULL;
6996         vfs_context_t ctx = vfs_context_current();
6997
6998         AUDIT_ARG(cmd, uap->cmd);
6999         AUDIT_ARG(value32, uap->options);
7000         /* Get the vnode for the file we are getting info on:  */
7001         nameiflags = 0;
7002         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
7003         NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, UIO_USERSPACE,
7004             uap->path, ctx);
7005         if ((error = namei(&nd))) goto done;
7006         vp = nd.ni_vp;
7007         nameidone(&nd);
7008
7009 #if CONFIG_MACF
7010         error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
7011         if (error) {
7012                 goto done;
7013         }
7014 #endif
7015
7016         error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
7017
7018 done:
7019         if (vp)
7020                 vnode_put(vp);
7021         return error;
7022 }
7023 /* ARGSUSED */
7024 int
7025 ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
7026 {
7027         int error;
7028         vnode_t vp = NULL;
7029         vfs_context_t ctx = vfs_context_current();
7030         int fd = -1;
7031
7032         AUDIT_ARG(fd, uap->fd);
7033         AUDIT_ARG(cmd, uap->cmd);
7034         AUDIT_ARG(value32, uap->options);
7035
7036         /* Get the vnode for the file we are getting info on:  */
7037         if ((error = file_vnode(uap->fd, &vp)))
7038                 goto done;
7039         fd = uap->fd;
7040         if ((error = vnode_getwithref(vp))) {
7041                 goto done;
7042         }
7043
7044 #if CONFIG_MACF
7045         error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
7046         if (error) {
7047                 goto done;
7048         }
7049 #endif
7050
7051         error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
7052
7053 done:
7054         if (fd != -1)
7055                 file_drop(fd);
7056
7057         if (vp)
7058                 vnode_put(vp);
7059         return error;
7060 }
7061 /* end of fsctl system call */
7062
7063 /*
7064  * An in-kernel sync for power management to call.
7065  */
7066 __private_extern__ int
7067 sync_internal(void)
7068 {
7069         int error;
7070
7071         struct sync_args data;
7072
7073         int retval[2];
7074
7075
7076         error = sync(current_proc(), &data, &retval[0]);
7077
7078
7079         return (error);
7080 } /* end of sync_internal call */
7081
7082
7083 /*
7084  *  Retrieve the data of an extended attribute.
7085  */
7086 int
7087 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
7088 {
7089         vnode_t vp;
7090         struct nameidata nd;
7091         char attrname[XATTR_MAXNAMELEN+1];
7092         vfs_context_t ctx = vfs_context_current();
7093         uio_t auio = NULL;
7094         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7095         size_t attrsize = 0;
7096         size_t namelen;
7097         u_int32_t nameiflags;
7098         int error;
7099         char uio_buf[ UIO_SIZEOF(1) ];
7100
7101         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
7102                 return (EINVAL);
7103
7104         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
7105         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
7106         if ((error = namei(&nd))) {
7107                 return (error);
7108         }
7109         vp = nd.ni_vp;
7110         nameidone(&nd);
7111
7112         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
7113                 goto out;
7114         }
7115         if (xattr_protected(attrname)) {
7116                 error = EPERM;
7117                 goto out;
7118         }
7119         /*
7120          * the specific check for 0xffffffff is a hack to preserve
7121          * binaray compatibilty in K64 with applications that discovered
7122          * that passing in a buf pointer and a size of -1 resulted in
7123          * just the size of the indicated extended attribute being returned.
7124          * this isn't part of the documented behavior, but because of the
7125          * original implemtation's check for "uap->size > 0", this behavior
7126          * was allowed. In K32 that check turned into a signed comparison
7127          * even though uap->size is unsigned...  in K64, we blow by that
7128          * check because uap->size is unsigned and doesn't get sign smeared
7129          * in the munger for a 32 bit user app.  we also need to add a
7130          * check to limit the maximum size of the buffer being passed in...
7131          * unfortunately, the underlying fileystems seem to just malloc
7132          * the requested size even if the actual extended attribute is tiny.
7133          * because that malloc is for kernel wired memory, we have to put a
7134          * sane limit on it.
7135          *
7136          * U32 running on K64 will yield 0x00000000ffffffff for uap->size
7137          * U64 running on K64 will yield -1 (64 bits wide)
7138          * U32/U64 running on K32 will yield -1 (32 bits wide)
7139          */
7140         if (uap->size == 0xffffffff || uap->size == (size_t)-1)
7141                 goto no_uio;
7142
7143         if (uap->size > (size_t)XATTR_MAXSIZE)
7144                 uap->size = XATTR_MAXSIZE;
7145
7146         if (uap->value) {
7147                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
7148                                             &uio_buf[0], sizeof(uio_buf));
7149                 uio_addiov(auio, uap->value, uap->size);
7150         }
7151 no_uio:
7152         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
7153 out:
7154         vnode_put(vp);
7155
7156         if (auio) {
7157                 *retval = uap->size - uio_resid(auio);
7158         } else {
7159                 *retval = (user_ssize_t)attrsize;
7160         }
7161
7162         return (error);
7163 }
7164
7165 /*
7166  * Retrieve the data of an extended attribute.
7167  */
7168 int
7169 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
7170 {
7171         vnode_t vp;
7172         char attrname[XATTR_MAXNAMELEN+1];
7173         uio_t auio = NULL;
7174         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7175         size_t attrsize = 0;
7176         size_t namelen;
7177         int error;
7178         char uio_buf[ UIO_SIZEOF(1) ];
7179
7180         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
7181                 return (EINVAL);
7182
7183         if ( (error = file_vnode(uap->fd, &vp)) ) {
7184                 return (error);
7185         }
7186         if ( (error = vnode_getwithref(vp)) ) {
7187                 file_drop(uap->fd);
7188                 return(error);
7189         }
7190         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
7191                 goto out;
7192         }
7193         if (xattr_protected(attrname)) {
7194                 error = EPERM;
7195                 goto out;
7196         }
7197         if (uap->value && uap->size > 0) {
7198                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
7199                                             &uio_buf[0], sizeof(uio_buf));
7200                 uio_addiov(auio, uap->value, uap->size);
7201         }
7202
7203         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
7204 out:
7205         (void)vnode_put(vp);
7206         file_drop(uap->fd);
7207
7208         if (auio) {
7209                 *retval = uap->size - uio_resid(auio);
7210         } else {
7211                 *retval = (user_ssize_t)attrsize;
7212         }
7213         return (error);
7214 }
7215
7216 /*
7217  * Set the data of an extended attribute.
7218  */
7219 int
7220 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
7221 {
7222         vnode_t vp;
7223         struct nameidata nd;
7224         char attrname[XATTR_MAXNAMELEN+1];
7225         vfs_context_t ctx = vfs_context_current();
7226         uio_t auio = NULL;
7227         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7228         size_t namelen;
7229         u_int32_t nameiflags;
7230         int error;
7231         char uio_buf[ UIO_SIZEOF(1) ];
7232
7233         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
7234                 return (EINVAL);
7235
7236         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
7237                 return (error);
7238         }
7239         if (xattr_protected(attrname))
7240                 return(EPERM);
7241         if (uap->size != 0 && uap->value == 0) {
7242                 return (EINVAL);
7243         }
7244
7245         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
7246         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
7247         if ((error = namei(&nd))) {
7248                 return (error);
7249         }
7250         vp = nd.ni_vp;
7251         nameidone(&nd);
7252
7253         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
7254                                     &uio_buf[0], sizeof(uio_buf));
7255         uio_addiov(auio, uap->value, uap->size);
7256
7257         error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
7258 #if CONFIG_FSE
7259         if (error == 0) {
7260                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
7261                     FSE_ARG_VNODE, vp,
7262                     FSE_ARG_DONE);
7263         }
7264 #endif
7265         vnode_put(vp);
7266         *retval = 0;
7267         return (error);
7268 }
7269
7270 /*
7271  * Set the data of an extended attribute.
7272  */
7273 int
7274 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
7275 {
7276         vnode_t vp;
7277         char attrname[XATTR_MAXNAMELEN+1];
7278         uio_t auio = NULL;
7279         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7280         size_t namelen;
7281         int error;
7282         char uio_buf[ UIO_SIZEOF(1) ];
7283         vfs_context_t ctx = vfs_context_current();
7284
7285         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
7286                 return (EINVAL);
7287
7288         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
7289                 return (error);
7290         }
7291         if (xattr_protected(attrname))
7292                 return(EPERM);
7293         if (uap->size != 0 && uap->value == 0) {
7294                 return (EINVAL);
7295         }
7296         if ( (error = file_vnode(uap->fd, &vp)) ) {
7297                 return (error);
7298         }
7299         if ( (error = vnode_getwithref(vp)) ) {
7300                 file_drop(uap->fd);
7301                 return(error);
7302         }
7303         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
7304                                     &uio_buf[0], sizeof(uio_buf));
7305         uio_addiov(auio, uap->value, uap->size);
7306
7307         error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
7308 #if CONFIG_FSE
7309         if (error == 0) {
7310                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
7311                     FSE_ARG_VNODE, vp,
7312                     FSE_ARG_DONE);
7313         }
7314 #endif
7315         vnode_put(vp);
7316         file_drop(uap->fd);
7317         *retval = 0;
7318         return (error);
7319 }
7320
7321 /*
7322  * Remove an extended attribute.
7323  * XXX Code duplication here.
7324  */
7325 int
7326 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
7327 {
7328         vnode_t vp;
7329         struct nameidata nd;
7330         char attrname[XATTR_MAXNAMELEN+1];
7331         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7332         vfs_context_t ctx = vfs_context_current();
7333         size_t namelen;
7334         u_int32_t nameiflags;
7335         int error;
7336
7337         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
7338                 return (EINVAL);
7339
7340         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
7341         if (error != 0) {
7342                 return (error);
7343         }
7344         if (xattr_protected(attrname))
7345                 return(EPERM);
7346         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
7347         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
7348         if ((error = namei(&nd))) {
7349                 return (error);
7350         }
7351         vp = nd.ni_vp;
7352         nameidone(&nd);
7353
7354         error = vn_removexattr(vp, attrname, uap->options, ctx);
7355 #if CONFIG_FSE
7356         if (error == 0) {
7357                 add_fsevent(FSE_XATTR_REMOVED, ctx,
7358                     FSE_ARG_VNODE, vp,
7359                     FSE_ARG_DONE);
7360         }
7361 #endif
7362         vnode_put(vp);
7363         *retval = 0;
7364         return (error);
7365 }
7366
7367 /*
7368  * Remove an extended attribute.
7369  * XXX Code duplication here.
7370  */
7371 int
7372 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
7373 {
7374         vnode_t vp;
7375         char attrname[XATTR_MAXNAMELEN+1];
7376         size_t namelen;
7377         int error;
7378         vfs_context_t ctx = vfs_context_current();
7379
7380         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
7381                 return (EINVAL);
7382
7383         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
7384         if (error != 0) {
7385                 return (error);
7386         }
7387         if (xattr_protected(attrname))
7388                 return(EPERM);
7389         if ( (error = file_vnode(uap->fd, &vp)) ) {
7390                 return (error);
7391         }
7392         if ( (error = vnode_getwithref(vp)) ) {
7393                 file_drop(uap->fd);
7394                 return(error);
7395         }
7396
7397         error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
7398 #if CONFIG_FSE
7399         if (error == 0) {
7400                 add_fsevent(FSE_XATTR_REMOVED, ctx,
7401                     FSE_ARG_VNODE, vp,
7402                     FSE_ARG_DONE);
7403         }
7404 #endif
7405         vnode_put(vp);
7406         file_drop(uap->fd);
7407         *retval = 0;
7408         return (error);
7409 }
7410
7411 /*
7412  * Retrieve the list of extended attribute names.
7413  * XXX Code duplication here.
7414  */
7415 int
7416 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
7417 {
7418         vnode_t vp;
7419         struct nameidata nd;
7420         vfs_context_t ctx = vfs_context_current();
7421         uio_t auio = NULL;
7422         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7423         size_t attrsize = 0;
7424         u_int32_t nameiflags;
7425         int error;
7426         char uio_buf[ UIO_SIZEOF(1) ];
7427
7428         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
7429                 return (EINVAL);
7430
7431         nameiflags = ((uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW) | NOTRIGGER;
7432         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
7433         if ((error = namei(&nd))) {
7434                 return (error);
7435         }
7436         vp = nd.ni_vp;
7437         nameidone(&nd);
7438         if (uap->namebuf != 0 && uap->bufsize > 0) {
7439                 auio = uio_createwithbuffer(1, 0, spacetype,
7440                                                                           UIO_READ, &uio_buf[0], sizeof(uio_buf));
7441                 uio_addiov(auio, uap->namebuf, uap->bufsize);
7442         }
7443
7444         error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
7445
7446         vnode_put(vp);
7447         if (auio) {
7448                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
7449         } else {
7450                 *retval = (user_ssize_t)attrsize;
7451         }
7452         return (error);
7453 }
7454
7455 /*
7456  * Retrieve the list of extended attribute names.
7457  * XXX Code duplication here.
7458  */
7459 int
7460 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
7461 {
7462         vnode_t vp;
7463         uio_t auio = NULL;
7464         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7465         size_t attrsize = 0;
7466         int error;
7467         char uio_buf[ UIO_SIZEOF(1) ];
7468
7469         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
7470                 return (EINVAL);
7471
7472         if ( (error = file_vnode(uap->fd, &vp)) ) {
7473                 return (error);
7474         }
7475         if ( (error = vnode_getwithref(vp)) ) {
7476                 file_drop(uap->fd);
7477                 return(error);
7478         }
7479         if (uap->namebuf != 0 && uap->bufsize > 0) {
7480                 auio = uio_createwithbuffer(1, 0, spacetype,
7481                                                                           UIO_READ, &uio_buf[0], sizeof(uio_buf));
7482                 uio_addiov(auio, uap->namebuf, uap->bufsize);
7483         }
7484
7485         error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
7486
7487         vnode_put(vp);
7488         file_drop(uap->fd);
7489         if (auio) {
7490                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
7491         } else {
7492                 *retval = (user_ssize_t)attrsize;
7493         }
7494         return (error);
7495 }
7496
7497 /*
7498  * Obtain the full pathname of a file system object by id.
7499  *
7500  * This is a private SPI used by the File Manager.
7501  */
7502 __private_extern__
7503 int
7504 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
7505 {
7506         vnode_t vp;
7507         struct mount *mp = NULL;
7508         vfs_context_t ctx = vfs_context_current();
7509         fsid_t fsid;
7510         char *realpath;
7511         int bpflags;
7512         int length;
7513         int error;
7514
7515         if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
7516                 return (error);
7517         }
7518         AUDIT_ARG(value32, fsid.val[0]);
7519         AUDIT_ARG(value64, uap->objid);
7520         /* Restrict output buffer size for now. */
7521         if (uap->bufsize > PAGE_SIZE) {
7522                 return (EINVAL);
7523         }
7524         MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
7525         if (realpath == NULL) {
7526                 return (ENOMEM);
7527         }
7528         /* Find the target mountpoint. */
7529         if ((mp = mount_lookupby_volfsid(fsid.val[0], 1)) == NULL) {
7530                 error = ENOTSUP;  /* unexpected failure */
7531                 goto out;
7532         }
7533         /* Find the target vnode. */
7534         if (uap->objid == 2) {
7535                 error = VFS_ROOT(mp, &vp, ctx);
7536         } else {
7537                 error = VFS_VGET(mp, (ino64_t)uap->objid, &vp, ctx);
7538         }
7539         vfs_unbusy(mp);
7540         if (error) {
7541                 goto out;
7542         }
7543         /* Obtain the absolute path to this vnode. */
7544         bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
7545         error = build_path(vp, realpath, uap->bufsize, &length, bpflags, ctx);
7546         vnode_put(vp);
7547         if (error) {
7548                 goto out;
7549         }
7550         AUDIT_ARG(text, realpath);
7551         error = copyout((caddr_t)realpath, uap->buf, length);
7552
7553         *retval = (user_ssize_t)length; /* may be superseded by error */
7554 out:
7555         if (realpath) {
7556                 FREE(realpath, M_TEMP);
7557         }
7558         return (error);
7559 }
7560
7561 /*
7562  * Common routine to handle various flavors of statfs data heading out
7563  *      to user space.
7564  *
7565  * Returns:     0                       Success
7566  *              EFAULT
7567  */
7568 static int
7569 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
7570     user_addr_t bufp, int *sizep, boolean_t is_64_bit,
7571     boolean_t partial_copy)
7572 {
7573         int             error;
7574         int             my_size, copy_size;
7575
7576         if (is_64_bit) {
7577                 struct user64_statfs sfs;
7578                 my_size = copy_size = sizeof(sfs);
7579                 bzero(&sfs, my_size);
7580                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
7581                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
7582                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
7583                 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
7584                 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
7585                 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
7586                 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
7587                 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
7588                 sfs.f_files = (user64_long_t)sfsp->f_files;
7589                 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
7590                 sfs.f_fsid = sfsp->f_fsid;
7591                 sfs.f_owner = sfsp->f_owner;
7592                 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
7593                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
7594                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
7595
7596                 if (partial_copy) {
7597                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
7598                 }
7599                 error = copyout((caddr_t)&sfs, bufp, copy_size);
7600         }
7601         else {
7602                 struct user32_statfs sfs;
7603
7604                 my_size = copy_size = sizeof(sfs);
7605                 bzero(&sfs, my_size);
7606
7607                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
7608                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
7609                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
7610
7611                 /*
7612                  * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
7613                  * have to fudge the numbers here in that case.   We inflate the blocksize in order
7614                  * to reflect the filesystem size as best we can.
7615                  */
7616                 if ((sfsp->f_blocks > INT_MAX)
7617                         /* Hack for 4061702 . I think the real fix is for Carbon to
7618                          * look for some volume capability and not depend on hidden
7619                          * semantics agreed between a FS and carbon.
7620                          * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
7621                          * for Carbon to set bNoVolumeSizes volume attribute.
7622                          * Without this the webdavfs files cannot be copied onto
7623                          * disk as they look huge. This change should not affect
7624                          * XSAN as they should not setting these to -1..
7625                          */
7626                          && (sfsp->f_blocks != 0xffffffffffffffffULL)
7627                          && (sfsp->f_bfree != 0xffffffffffffffffULL)
7628                          && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
7629                         int             shift;
7630
7631                         /*
7632                          * Work out how far we have to shift the block count down to make it fit.
7633                          * Note that it's possible to have to shift so far that the resulting
7634                          * blocksize would be unreportably large.  At that point, we will clip
7635                          * any values that don't fit.
7636                          *
7637                          * For safety's sake, we also ensure that f_iosize is never reported as
7638                          * being smaller than f_bsize.
7639                          */
7640                         for (shift = 0; shift < 32; shift++) {
7641                                 if ((sfsp->f_blocks >> shift) <= INT_MAX)
7642                                         break;
7643                                 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
7644                                         break;
7645                         }
7646 #define __SHIFT_OR_CLIP(x, s)   ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
7647                         sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
7648                         sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
7649                         sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
7650 #undef __SHIFT_OR_CLIP
7651                         sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
7652                         sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
7653                 } else {
7654                         /* filesystem is small enough to be reported honestly */
7655                         sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
7656                         sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
7657                         sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
7658                         sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
7659                         sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
7660                 }
7661                 sfs.f_files = (user32_long_t)sfsp->f_files;
7662                 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
7663                 sfs.f_fsid = sfsp->f_fsid;
7664                 sfs.f_owner = sfsp->f_owner;
7665                 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
7666                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
7667                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
7668
7669                 if (partial_copy) {
7670                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
7671                 }
7672                 error = copyout((caddr_t)&sfs, bufp, copy_size);
7673         }
7674
7675         if (sizep != NULL) {
7676                 *sizep = my_size;
7677         }
7678         return(error);
7679 }
7680
7681 /*
7682  * copy stat structure into user_stat structure.
7683  */
7684 void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
7685 {
7686         bzero(usbp, sizeof(*usbp));
7687
7688         usbp->st_dev = sbp->st_dev;
7689         usbp->st_ino = sbp->st_ino;
7690         usbp->st_mode = sbp->st_mode;
7691         usbp->st_nlink = sbp->st_nlink;
7692         usbp->st_uid = sbp->st_uid;
7693         usbp->st_gid = sbp->st_gid;
7694         usbp->st_rdev = sbp->st_rdev;
7695 #ifndef _POSIX_C_SOURCE
7696         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
7697         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
7698         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
7699         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
7700         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
7701         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
7702 #else
7703         usbp->st_atime = sbp->st_atime;
7704         usbp->st_atimensec = sbp->st_atimensec;
7705         usbp->st_mtime = sbp->st_mtime;
7706         usbp->st_mtimensec = sbp->st_mtimensec;
7707         usbp->st_ctime = sbp->st_ctime;
7708         usbp->st_ctimensec = sbp->st_ctimensec;
7709 #endif
7710         usbp->st_size = sbp->st_size;
7711         usbp->st_blocks = sbp->st_blocks;
7712         usbp->st_blksize = sbp->st_blksize;
7713         usbp->st_flags = sbp->st_flags;
7714         usbp->st_gen = sbp->st_gen;
7715         usbp->st_lspare = sbp->st_lspare;
7716         usbp->st_qspare[0] = sbp->st_qspare[0];
7717         usbp->st_qspare[1] = sbp->st_qspare[1];
7718 }
7719
7720 void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
7721 {
7722         bzero(usbp, sizeof(*usbp));
7723
7724         usbp->st_dev = sbp->st_dev;
7725         usbp->st_ino = sbp->st_ino;
7726         usbp->st_mode = sbp->st_mode;
7727         usbp->st_nlink = sbp->st_nlink;
7728         usbp->st_uid = sbp->st_uid;
7729         usbp->st_gid = sbp->st_gid;
7730         usbp->st_rdev = sbp->st_rdev;
7731 #ifndef _POSIX_C_SOURCE
7732         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
7733         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
7734         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
7735         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
7736         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
7737         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
7738 #else
7739         usbp->st_atime = sbp->st_atime;
7740         usbp->st_atimensec = sbp->st_atimensec;
7741         usbp->st_mtime = sbp->st_mtime;
7742         usbp->st_mtimensec = sbp->st_mtimensec;
7743         usbp->st_ctime = sbp->st_ctime;
7744         usbp->st_ctimensec = sbp->st_ctimensec;
7745 #endif
7746         usbp->st_size = sbp->st_size;
7747         usbp->st_blocks = sbp->st_blocks;
7748         usbp->st_blksize = sbp->st_blksize;
7749         usbp->st_flags = sbp->st_flags;
7750         usbp->st_gen = sbp->st_gen;
7751         usbp->st_lspare = sbp->st_lspare;
7752         usbp->st_qspare[0] = sbp->st_qspare[0];
7753         usbp->st_qspare[1] = sbp->st_qspare[1];
7754 }
7755
7756 /*
7757  * copy stat64 structure into user_stat64 structure.
7758  */
7759 void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
7760 {
7761         bzero(usbp, sizeof(*usbp));
7762
7763         usbp->st_dev = sbp->st_dev;
7764         usbp->st_ino = sbp->st_ino;
7765         usbp->st_mode = sbp->st_mode;
7766         usbp->st_nlink = sbp->st_nlink;
7767         usbp->st_uid = sbp->st_uid;
7768         usbp->st_gid = sbp->st_gid;
7769         usbp->st_rdev = sbp->st_rdev;
7770 #ifndef _POSIX_C_SOURCE
7771         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
7772         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
7773         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
7774         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
7775         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
7776         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
7777         usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
7778         usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
7779 #else
7780         usbp->st_atime = sbp->st_atime;
7781         usbp->st_atimensec = sbp->st_atimensec;
7782         usbp->st_mtime = sbp->st_mtime;
7783         usbp->st_mtimensec = sbp->st_mtimensec;
7784         usbp->st_ctime = sbp->st_ctime;
7785         usbp->st_ctimensec = sbp->st_ctimensec;
7786         usbp->st_birthtime = sbp->st_birthtime;
7787         usbp->st_birthtimensec = sbp->st_birthtimensec;
7788 #endif
7789         usbp->st_size = sbp->st_size;
7790         usbp->st_blocks = sbp->st_blocks;
7791         usbp->st_blksize = sbp->st_blksize;
7792         usbp->st_flags = sbp->st_flags;
7793         usbp->st_gen = sbp->st_gen;
7794         usbp->st_lspare = sbp->st_lspare;
7795         usbp->st_qspare[0] = sbp->st_qspare[0];
7796         usbp->st_qspare[1] = sbp->st_qspare[1];
7797 }
7798
7799 void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
7800 {
7801         bzero(usbp, sizeof(*usbp));
7802
7803         usbp->st_dev = sbp->st_dev;
7804         usbp->st_ino = sbp->st_ino;
7805         usbp->st_mode = sbp->st_mode;
7806         usbp->st_nlink = sbp->st_nlink;
7807         usbp->st_uid = sbp->st_uid;
7808         usbp->st_gid = sbp->st_gid;
7809         usbp->st_rdev = sbp->st_rdev;
7810 #ifndef _POSIX_C_SOURCE
7811         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
7812         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
7813         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
7814         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
7815         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
7816         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
7817         usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
7818         usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
7819 #else
7820         usbp->st_atime = sbp->st_atime;
7821         usbp->st_atimensec = sbp->st_atimensec;
7822         usbp->st_mtime = sbp->st_mtime;
7823         usbp->st_mtimensec = sbp->st_mtimensec;
7824         usbp->st_ctime = sbp->st_ctime;
7825         usbp->st_ctimensec = sbp->st_ctimensec;
7826         usbp->st_birthtime = sbp->st_birthtime;
7827         usbp->st_birthtimensec = sbp->st_birthtimensec;
7828 #endif
7829         usbp->st_size = sbp->st_size;
7830         usbp->st_blocks = sbp->st_blocks;
7831         usbp->st_blksize = sbp->st_blksize;
7832         usbp->st_flags = sbp->st_flags;
7833         usbp->st_gen = sbp->st_gen;
7834         usbp->st_lspare = sbp->st_lspare;
7835         usbp->st_qspare[0] = sbp->st_qspare[0];
7836         usbp->st_qspare[1] = sbp->st_qspare[1];
7837 }