bsd/vfs/vfs_syscalls.c

   1 /*
   2  * Copyright (c) 1995-2008 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1989, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  * (c) UNIX System Laboratories, Inc.
  32  * All or some portions of this file are derived from material licensed
  33  * to the University of California by American Telephone and Telegraph
  34  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  35  * the permission of UNIX System Laboratories, Inc.
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions
  39  * are met:
  40  * 1. Redistributions of source code must retain the above copyright
  41  *    notice, this list of conditions and the following disclaimer.
  42  * 2. Redistributions in binary form must reproduce the above copyright
  43  *    notice, this list of conditions and the following disclaimer in the
  44  *    documentation and/or other materials provided with the distribution.
  45  * 3. All advertising materials mentioning features or use of this software
  46  *    must display the following acknowledgement:
  47  *      This product includes software developed by the University of
  48  *      California, Berkeley and its contributors.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  *
  65  *      @(#)vfs_syscalls.c      8.41 (Berkeley) 6/15/95
  66  */
  67 /*
  68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  69  * support for mandatory and extensible security protections.  This notice
  70  * is included in support of clause 2.2 (b) of the Apple Public License,
  71  * Version 2.0.
  72  */
  73
  74 #include <sys/param.h>
  75 #include <sys/systm.h>
  76 #include <sys/namei.h>
  77 #include <sys/filedesc.h>
  78 #include <sys/kernel.h>
  79 #include <sys/file_internal.h>
  80 #include <sys/stat.h>
  81 #include <sys/vnode_internal.h>
  82 #include <sys/mount_internal.h>
  83 #include <sys/proc_internal.h>
  84 #include <sys/kauth.h>
  85 #include <sys/uio_internal.h>
  86 #include <sys/malloc.h>
  87 #include <sys/mman.h>
  88 #include <sys/dirent.h>
  89 #include <sys/attr.h>
  90 #include <sys/sysctl.h>
  91 #include <sys/ubc.h>
  92 #include <sys/quota.h>
  93 #include <sys/kdebug.h>
  94 #include <sys/fsevents.h>
  95 #include <sys/sysproto.h>
  96 #include <sys/xattr.h>
  97 #include <sys/fcntl.h>
  98 #include <sys/fsctl.h>
  99 #include <sys/ubc_internal.h>
 100 #include <sys/disk.h>
 101 #include <machine/cons.h>
 102 #include <machine/limits.h>
 103 #include <miscfs/specfs/specdev.h>
 104 #include <miscfs/union/union.h>
 105
 106 #include <security/audit/audit.h>
 107 #include <bsm/audit_kevents.h>
 108
 109 #include <mach/mach_types.h>
 110 #include <kern/kern_types.h>
 111 #include <kern/kalloc.h>
 112
 113 #include <vm/vm_pageout.h>
 114
 115 #include <libkern/OSAtomic.h>
 116 #include <pexpert/pexpert.h>
 117
 118 #if CONFIG_MACF
 119 #include <security/mac.h>
 120 #include <security/mac_framework.h>
 121 #endif
 122
 123 #if CONFIG_FSE
 124 #define GET_PATH(x) \
 125         (x) = get_pathbuff();
 126 #define RELEASE_PATH(x) \
 127         release_pathbuff(x);
 128 #else
 129 #define GET_PATH(x)     \
 130         MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
 131 #define RELEASE_PATH(x) \
 132         FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
 133 #endif /* CONFIG_FSE */
 134
 135 /* struct for checkdirs iteration */
 136 struct cdirargs {
 137         vnode_t olddp;
 138         vnode_t newdp;
 139 };
 140 /* callback  for checkdirs iteration */
 141 static int checkdirs_callback(proc_t p, void * arg);
 142
 143 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
 144 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
 145 void enablequotas(struct mount *mp, vfs_context_t ctx);
 146 static int getfsstat_callback(mount_t mp, void * arg);
 147 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
 148 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
 149 static int sync_callback(mount_t, void *);
 150 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
 151                         user_addr_t bufp, int *sizep, boolean_t is_64_bit,
 152                                                 boolean_t partial_copy);
 153 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
 154                         user_addr_t bufp);
 155 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
 156
 157 #ifdef CONFIG_IMGSRC_ACCESS
 158 static int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname);
 159 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
 160 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
 161 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
 162 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
 163 static void mount_end_update(mount_t mp);
 164 static int relocate_imageboot_source(vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs);
 165 #endif /* CONFIG_IMGSRC_ACCESS */
 166
 167 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
 168
 169 __private_extern__
 170 int sync_internal(void);
 171
 172 __private_extern__
 173 int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, int32_t *);
 174
 175 __private_extern__
 176 int unlink1(vfs_context_t, struct nameidata *, int);
 177
 178
 179 #ifdef __APPLE_API_OBSOLETE
 180 struct fstatv_args {
 181        int fd;                  /* file descriptor of the target file */
 182        struct vstat *vsb;       /* vstat structure for returned info  */
 183 };
 184 struct lstatv_args {
 185        const char *path;        /* pathname of the target file       */
 186        struct vstat *vsb;       /* vstat structure for returned info */
 187 };
 188 struct mkcomplex_args {
 189         const char *path;       /* pathname of the file to be created */
 190                 mode_t mode;            /* access mode for the newly created file */
 191         u_int32_t type;         /* format of the complex file */
 192 };
 193 struct statv_args {
 194         const char *path;       /* pathname of the target file       */
 195         struct vstat *vsb;      /* vstat structure for returned info */
 196 };
 197
 198 int fstatv(proc_t p, struct fstatv_args *uap, int32_t *retval);
 199 int lstatv(proc_t p, struct lstatv_args *uap, int32_t *retval);
 200 int mkcomplex(proc_t p, struct mkcomplex_args *uap, int32_t *retval);
 201 int statv(proc_t p, struct statv_args *uap, int32_t *retval);
 202
 203 #endif /* __APPLE_API_OBSOLETE */
 204
 205 /*
 206  * incremented each time a mount or unmount operation occurs
 207  * used to invalidate the cached value of the rootvp in the
 208  * mount structure utilized by cache_lookup_path
 209  */
 210 uint32_t mount_generation = 0;
 211
 212 /* counts number of mount and unmount operations */
 213 unsigned int vfs_nummntops=0;
 214
 215 extern struct fileops vnops;
 216 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
 217
 218
 219 /*
 220  * Virtual File System System Calls
 221  */
 222
 223 /*
 224  * Mount a file system.
 225  */
 226 /* ARGSUSED */
 227 int
 228 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
 229 {
 230         struct __mac_mount_args muap;
 231
 232         muap.type = uap->type;
 233         muap.path = uap->path;
 234         muap.flags = uap->flags;
 235         muap.data = uap->data;
 236         muap.mac_p = USER_ADDR_NULL;
 237         return (__mac_mount(p, &muap, retval));
 238 }
 239
 240 /*
 241  * __mac_mount:
 242  *      Mount a file system taking into account MAC label behavior.
 243  *      See mount(2) man page for more information
 244  *
 245  * Parameters:    p                        Process requesting the mount
 246  *                uap                      User argument descriptor (see below)
 247  *                retval                   (ignored)
 248  *
 249  * Indirect:      uap->type                Filesystem type
 250  *                uap->path                Path to mount
 251  *                uap->data                Mount arguments
 252  *                uap->mac_p               MAC info
 253  *                uap->flags               Mount flags
 254  *
 255  *
 256  * Returns:        0                       Success
 257  *                !0                       Not success
 258  */
 259 int
 260 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
 261 {
 262         struct vnode *vp, *pvp;
 263         struct vnode *devvp = NULLVP;
 264         struct vnode *device_vnode = NULLVP;
 265 #if CONFIG_MACF
 266         struct vnode *rvp;
 267 #endif
 268         struct mount *mp;
 269         struct vfstable *vfsp = (struct vfstable *)0;
 270         int error, flag = 0;
 271         struct vnode_attr va;
 272         vfs_context_t ctx = vfs_context_current();
 273         struct nameidata nd;
 274         struct nameidata nd1;
 275         char fstypename[MFSNAMELEN];
 276         size_t dummy=0;
 277         user_addr_t devpath = USER_ADDR_NULL;
 278         user_addr_t fsmountargs =  uap->data;
 279         int ronly = 0;
 280         int mntalloc = 0;
 281         boolean_t vfsp_ref = FALSE;
 282         mode_t accessmode;
 283         boolean_t is_64bit;
 284         boolean_t is_rwlock_locked = FALSE;
 285         boolean_t did_rele = FALSE;
 286         boolean_t have_usecount = FALSE;
 287
 288         AUDIT_ARG(fflags, uap->flags);
 289
 290         is_64bit = proc_is64bit(p);
 291
 292         /*
 293          * Get vnode to be covered
 294          */
 295         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1 | WANTPARENT,
 296                    UIO_USERSPACE, uap->path, ctx);
 297         error = namei(&nd);
 298         if (error)
 299                 return (error);
 300         vp = nd.ni_vp;
 301         pvp = nd.ni_dvp;
 302
 303         if ((vp->v_flag & VROOT) &&
 304                 (vp->v_mount->mnt_flag & MNT_ROOTFS))
 305                         uap->flags |= MNT_UPDATE;
 306
 307         error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
 308         if (error)
 309                 goto out1;
 310
 311 #ifdef CONFIG_IMGSRC_ACCESS
 312         if (uap->flags == MNT_IMGSRC) {
 313                 error = relocate_imageboot_source(vp, &nd.ni_cnd, fstypename, ctx, is_64bit, fsmountargs);
 314                 vnode_put(pvp);
 315                 vnode_put(vp);
 316                 return error;
 317         }
 318 #endif /* CONFIG_IMGSRC_ACCESS */
 319
 320         if (uap->flags & MNT_UPDATE) {
 321                 if ((vp->v_flag & VROOT) == 0) {
 322                         error = EINVAL;
 323                         goto out1;
 324                 }
 325                 mp = vp->v_mount;
 326
 327                 /* unmount in progress return error */
 328                 mount_lock_spin(mp);
 329                 if (mp->mnt_lflag & MNT_LUNMOUNT) {
 330                         mount_unlock(mp);
 331                         error = EBUSY;
 332                         goto out1;
 333                 }
 334                 mount_unlock(mp);
 335                 lck_rw_lock_exclusive(&mp->mnt_rwlock);
 336                 is_rwlock_locked = TRUE;
 337                 /*
 338                  * We only allow the filesystem to be reloaded if it
 339                  * is currently mounted read-only.
 340                  */
 341                 if ((uap->flags & MNT_RELOAD) &&
 342                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
 343                         error = ENOTSUP;
 344                         goto out1;
 345                 }
 346
 347 #ifdef CONFIG_IMGSRC_ACCESS
 348                 /* Can't downgrade the backer of the root FS */
 349                 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
 350                         (!vfs_isrdonly(mp)) && (uap->flags & MNT_RDONLY))
 351                 {
 352                         error = ENOTSUP;
 353                         goto out1;
 354                 }
 355 #endif /* CONFIG_IMGSRC_ACCESS */
 356
 357                 /*
 358                  * Only root, or the user that did the original mount is
 359                  * permitted to update it.
 360                  */
 361                 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
 362                     (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
 363                         goto out1;
 364                 }
 365 #if CONFIG_MACF
 366                 error = mac_mount_check_remount(ctx, mp);
 367                 if (error != 0) {
 368                         lck_rw_done(&mp->mnt_rwlock);
 369                         goto out1;
 370                 }
 371 #endif
 372                 /*
 373                  * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
 374                  * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
 375                  */
 376                 if (suser(vfs_context_ucred(ctx), NULL)) {
 377                         uap->flags |= MNT_NOSUID | MNT_NODEV;
 378                         if (mp->mnt_flag & MNT_NOEXEC)
 379                                 uap->flags |= MNT_NOEXEC;
 380                 }
 381                 flag = mp->mnt_flag;
 382
 383                 mp->mnt_flag |=
 384                     uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
 385
 386                 vfsp = mp->mnt_vtable;
 387                 goto update;
 388         }
 389         /*
 390          * If the user is not root, ensure that they own the directory
 391          * onto which we are attempting to mount.
 392          */
 393         VATTR_INIT(&va);
 394         VATTR_WANTED(&va, va_uid);
 395         if ((error = vnode_getattr(vp, &va, ctx)) ||
 396             (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
 397              (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))) {
 398                 goto out1;
 399         }
 400         /*
 401          * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
 402          * MNT_NOEXEC if mount point is already MNT_NOEXEC.
 403          */
 404         if (suser(vfs_context_ucred(ctx), NULL)) {
 405                 uap->flags |= MNT_NOSUID | MNT_NODEV;
 406                 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
 407                         uap->flags |= MNT_NOEXEC;
 408         }
 409         if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
 410                 goto out1;
 411
 412         if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
 413                 goto out1;
 414
 415         if (vp->v_type != VDIR) {
 416                 error = ENOTDIR;
 417                 goto out1;
 418         }
 419
 420         /* XXXAUDIT: Should we capture the type on the error path as well? */
 421         AUDIT_ARG(text, fstypename);
 422         mount_list_lock();
 423         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 424                 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
 425                         vfsp->vfc_refcount++;
 426                         vfsp_ref = TRUE;
 427                         break;
 428                 }
 429         mount_list_unlock();
 430         if (vfsp == NULL) {
 431                 error = ENODEV;
 432                 goto out1;
 433         }
 434 #if CONFIG_MACF
 435         error = mac_mount_check_mount(ctx, vp,
 436             &nd.ni_cnd, vfsp->vfc_name);
 437         if (error != 0)
 438                 goto out1;
 439 #endif
 440         if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
 441                 error = EBUSY;
 442                 goto out1;
 443         }
 444         vnode_lock_spin(vp);
 445         SET(vp->v_flag, VMOUNT);
 446         vnode_unlock(vp);
 447
 448         /*
 449          * Allocate and initialize the filesystem.
 450          */
 451         MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
 452                 M_MOUNT, M_WAITOK);
 453         bzero((char *)mp, (u_int32_t)sizeof(struct mount));
 454         mntalloc = 1;
 455
 456         /* Initialize the default IO constraints */
 457         mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
 458         mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
 459         mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
 460         mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
 461         mp->mnt_devblocksize = DEV_BSIZE;
 462         mp->mnt_alignmentmask = PAGE_MASK;
 463         mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
 464         mp->mnt_ioscale = 1;
 465         mp->mnt_ioflags = 0;
 466         mp->mnt_realrootvp = NULLVP;
 467         mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
 468
 469         TAILQ_INIT(&mp->mnt_vnodelist);
 470         TAILQ_INIT(&mp->mnt_workerqueue);
 471         TAILQ_INIT(&mp->mnt_newvnodes);
 472         mount_lock_init(mp);
 473         lck_rw_lock_exclusive(&mp->mnt_rwlock);
 474         is_rwlock_locked = TRUE;
 475         mp->mnt_op = vfsp->vfc_vfsops;
 476         mp->mnt_vtable = vfsp;
 477         //mp->mnt_stat.f_type = vfsp->vfc_typenum;
 478         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 479         strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
 480         strncpy(mp->mnt_vfsstat.f_mntonname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
 481         mp->mnt_vnodecovered = vp;
 482         mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
 483         mp->mnt_devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
 484
 485         /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
 486         vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
 487
 488 update:
 489         /*
 490          * Set the mount level flags.
 491          */
 492         if (uap->flags & MNT_RDONLY)
 493                 mp->mnt_flag |= MNT_RDONLY;
 494         else if (mp->mnt_flag & MNT_RDONLY)
 495                 mp->mnt_kern_flag |= MNTK_WANTRDWR;
 496
 497         mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 498                           MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 499                           MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED |
 500                           MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE | MNT_CPROTECT );
 501
 502         mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 503                                       MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
 504                                       MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED |
 505                                           MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE | MNT_CPROTECT );
 506
 507 #if CONFIG_MACF
 508         if (uap->flags & MNT_MULTILABEL) {
 509                 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
 510                         error = EINVAL;
 511                         goto out1;
 512                 }
 513                 mp->mnt_flag |= MNT_MULTILABEL;
 514         }
 515 #endif
 516
 517         if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
 518                 if (is_64bit) {
 519                         if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
 520                                 goto out1;
 521                         fsmountargs += sizeof(devpath);
 522                 } else {
 523                         user32_addr_t tmp;
 524                         if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
 525                                 goto out1;
 526                         /* munge into LP64 addr */
 527                         devpath = CAST_USER_ADDR_T(tmp);
 528                         fsmountargs += sizeof(tmp);
 529                 }
 530
 531                 /* if it is not update and device name needs to be parsed */
 532                 if ((devpath)) {
 533                         NDINIT(&nd1, LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
 534                         if ( (error = namei(&nd1)) )
 535                                 goto out1;
 536
 537                         strncpy(mp->mnt_vfsstat.f_mntfromname, nd1.ni_cnd.cn_pnbuf, MAXPATHLEN);
 538                         devvp = nd1.ni_vp;
 539
 540                         nameidone(&nd1);
 541
 542                         if (devvp->v_type != VBLK) {
 543                                 error = ENOTBLK;
 544                                 goto out2;
 545                         }
 546                         if (major(devvp->v_rdev) >= nblkdev) {
 547                                 error = ENXIO;
 548                                 goto out2;
 549                         }
 550                         /*
 551                         * If mount by non-root, then verify that user has necessary
 552                         * permissions on the device.
 553                         */
 554                         if (suser(vfs_context_ucred(ctx), NULL) != 0) {
 555                                 accessmode = KAUTH_VNODE_READ_DATA;
 556                                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
 557                                         accessmode |= KAUTH_VNODE_WRITE_DATA;
 558                                 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
 559                                         goto out2;
 560                         }
 561                 }
 562                 if (devpath && ((uap->flags & MNT_UPDATE) == 0)) {
 563                         if ( (error = vnode_ref(devvp)) )
 564                                 goto out2;
 565                         /*
 566                         * Disallow multiple mounts of the same device.
 567                         * Disallow mounting of a device that is currently in use
 568                         * (except for root, which might share swap device for miniroot).
 569                         * Flush out any old buffers remaining from a previous use.
 570                         */
 571                         if ( (error = vfs_mountedon(devvp)) )
 572                                 goto out3;
 573
 574                         if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
 575                                 error = EBUSY;
 576                                 goto out3;
 577                         }
 578                         if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
 579                                 error = ENOTBLK;
 580                                 goto out3;
 581                         }
 582                         if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
 583                                 goto out3;
 584
 585                         ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 586 #if CONFIG_MACF
 587                         error = mac_vnode_check_open(ctx,
 588                             devvp,
 589                             ronly ? FREAD : FREAD|FWRITE);
 590                         if (error)
 591                                 goto out3;
 592 #endif /* MAC */
 593                         if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
 594                                 goto out3;
 595
 596                         mp->mnt_devvp = devvp;
 597                         device_vnode = devvp;
 598                 } else {
 599                         if ((mp->mnt_flag & MNT_RDONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 600                                 dev_t dev;
 601                                 int maj;
 602                                 /*
 603                                  * If upgrade to read-write by non-root, then verify
 604                                  * that user has necessary permissions on the device.
 605                                  */
 606                                 device_vnode = mp->mnt_devvp;
 607
 608                                 if (device_vnode) {
 609                                         vnode_getalways(device_vnode);
 610
 611                                         if (suser(vfs_context_ucred(ctx), NULL)) {
 612                                                 if ((error = vnode_authorize(device_vnode, NULL,
 613                                                                                 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) {
 614                                                         vnode_put(device_vnode);
 615                                                         goto out2;
 616                                                 }
 617                                         }
 618
 619                                         /* Tell the device that we're upgrading */
 620                                         dev = (dev_t)device_vnode->v_rdev;
 621                                         maj = major(dev);
 622
 623                                         if ((u_int)maj >= (u_int)nblkdev)
 624                                                 panic("Volume mounted on a device with invalid major number.\n");
 625
 626                                         error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
 627
 628                                         vnode_put(device_vnode);
 629                                         if (error != 0) {
 630                                                 goto out2;
 631                                         }
 632                                 }
 633                         }
 634                         device_vnode = NULLVP;
 635                 }
 636         }
 637 #if CONFIG_MACF
 638         if ((uap->flags & MNT_UPDATE) == 0) {
 639                 mac_mount_label_init(mp);
 640                 mac_mount_label_associate(ctx, mp);
 641         }
 642         if (uap->mac_p != USER_ADDR_NULL) {
 643                 struct user_mac mac;
 644                 char *labelstr = NULL;
 645                 size_t ulen = 0;
 646
 647                 if ((uap->flags & MNT_UPDATE) != 0) {
 648                         error = mac_mount_check_label_update(
 649                             ctx, mp);
 650                         if (error != 0)
 651                                 goto out3;
 652                 }
 653                 if (is_64bit) {
 654                         error = copyin(uap->mac_p, &mac, sizeof(mac));
 655                 } else {
 656                         struct mac mac32;
 657                         error = copyin(uap->mac_p, &mac32, sizeof(mac32));
 658                         mac.m_buflen = mac32.m_buflen;
 659                         mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
 660                 }
 661                 if (error != 0)
 662                         goto out3;
 663                 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
 664                     (mac.m_buflen < 2)) {
 665                         error = EINVAL;
 666                         goto out3;
 667                 }
 668                 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
 669                 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
 670                 if (error != 0) {
 671                         FREE(labelstr, M_MACTEMP);
 672                         goto out3;
 673                 }
 674                 AUDIT_ARG(mac_string, labelstr);
 675                 error = mac_mount_label_internalize(mp->mnt_mntlabel, labelstr);
 676                 FREE(labelstr, M_MACTEMP);
 677                 if (error != 0)
 678                         goto out3;
 679         }
 680 #endif
 681         if (device_vnode != NULL) {
 682                 VNOP_IOCTL(device_vnode, DKIOCGETBSDUNIT, (caddr_t)&mp->mnt_devbsdunit, 0, NULL);
 683                 mp->mnt_devbsdunit %= LOWPRI_MAX_NUM_DEV;
 684         }
 685
 686         /*
 687          * Mount the filesystem.
 688          */
 689         error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
 690
 691         if (uap->flags & MNT_UPDATE) {
 692                 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
 693                         mp->mnt_flag &= ~MNT_RDONLY;
 694                 mp->mnt_flag &=~
 695                     (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
 696                 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
 697                 if (error)
 698                         mp->mnt_flag = flag;
 699                 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
 700                 lck_rw_done(&mp->mnt_rwlock);
 701                 is_rwlock_locked = FALSE;
 702                 if (!error)
 703                         enablequotas(mp, ctx);
 704                 goto out2;
 705         }
 706         /*
 707          * Put the new filesystem on the mount list after root.
 708          */
 709         if (error == 0) {
 710                 struct vfs_attr vfsattr;
 711 #if CONFIG_MACF
 712                 if (vfs_flags(mp) & MNT_MULTILABEL) {
 713                         error = VFS_ROOT(mp, &rvp, ctx);
 714                         if (error) {
 715                                 printf("%s() VFS_ROOT returned %d\n", __func__, error);
 716                                 goto out3;
 717                         }
 718                         error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
 719                         /*
 720                          * drop reference provided by VFS_ROOT
 721                          */
 722                         vnode_put(rvp);
 723
 724                         if (error)
 725                                 goto out3;
 726                 }
 727 #endif  /* MAC */
 728
 729                 vnode_lock_spin(vp);
 730                 CLR(vp->v_flag, VMOUNT);
 731                 vp->v_mountedhere = mp;
 732                 vnode_unlock(vp);
 733
 734                 /*
 735                  * taking the name_cache_lock exclusively will
 736                  * insure that everyone is out of the fast path who
 737                  * might be trying to use a now stale copy of
 738                  * vp->v_mountedhere->mnt_realrootvp
 739                  * bumping mount_generation causes the cached values
 740                  * to be invalidated
 741                  */
 742                 name_cache_lock();
 743                 mount_generation++;
 744                 name_cache_unlock();
 745
 746                 error = vnode_ref(vp);
 747                 if (error != 0) {
 748                         goto out4;
 749                 }
 750
 751                 have_usecount = TRUE;
 752
 753                 error = checkdirs(vp, ctx);
 754                 if (error != 0)  {
 755                         /* Unmount the filesystem as cdir/rdirs cannot be updated */
 756                         goto out4;
 757                 }
 758                 /*
 759                  * there is no cleanup code here so I have made it void
 760                  * we need to revisit this
 761                  */
 762                 (void)VFS_START(mp, 0, ctx);
 763
 764                 error = mount_list_add(mp);
 765                 if (error != 0) {
 766                         goto out4;
 767                 }
 768
 769                 lck_rw_done(&mp->mnt_rwlock);
 770                 is_rwlock_locked = FALSE;
 771
 772                 /* Check if this mounted file system supports EAs or named streams. */
 773                 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
 774                 VFSATTR_INIT(&vfsattr);
 775                 VFSATTR_WANTED(&vfsattr, f_capabilities);
 776                 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
 777                     vfs_getattr(mp, &vfsattr, ctx) == 0 &&
 778                     VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
 779                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
 780                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
 781                                 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 782                         }
 783 #if NAMEDSTREAMS
 784                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
 785                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
 786                                 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
 787                         }
 788 #endif
 789                         /* Check if this file system supports path from id lookups. */
 790                         if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
 791                             (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
 792                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 793                         } else if (mp->mnt_flag & MNT_DOVOLFS) {
 794                                 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
 795                                 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
 796                         }
 797                 }
 798                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
 799                         mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
 800                 }
 801                 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
 802                         mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
 803                 }
 804                 /* increment the operations count */
 805                 OSAddAtomic(1, &vfs_nummntops);
 806                 enablequotas(mp, ctx);
 807
 808                 if (device_vnode) {
 809                         device_vnode->v_specflags |= SI_MOUNTEDON;
 810
 811                         /*
 812                          *   cache the IO attributes for the underlying physical media...
 813                          *   an error return indicates the underlying driver doesn't
 814                          *   support all the queries necessary... however, reasonable
 815                          *   defaults will have been set, so no reason to bail or care
 816                          */
 817                         vfs_init_io_attributes(device_vnode, mp);
 818                 }
 819
 820                 /* Now that mount is setup, notify the listeners */
 821                 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
 822         } else {
 823                 vnode_lock_spin(vp);
 824                 CLR(vp->v_flag, VMOUNT);
 825                 vnode_unlock(vp);
 826                 mount_list_lock();
 827                 mp->mnt_vtable->vfc_refcount--;
 828                 mount_list_unlock();
 829
 830                 if (device_vnode ) {
 831                         vnode_rele(device_vnode);
 832                         VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
 833                 }
 834                 lck_rw_done(&mp->mnt_rwlock);
 835                 is_rwlock_locked = FALSE;
 836                 mount_lock_destroy(mp);
 837 #if CONFIG_MACF
 838                 mac_mount_label_destroy(mp);
 839 #endif
 840                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
 841         }
 842         nameidone(&nd);
 843
 844         /*
 845          * drop I/O count on covered 'vp' and
 846          * on the device vp if there was one
 847          */
 848         if (devpath && devvp)
 849                 vnode_put(devvp);
 850         vnode_put(vp);
 851
 852         /* Note that we've changed something in the parent directory */
 853         post_event_if_success(pvp, error, NOTE_WRITE);
 854         vnode_put(pvp);
 855
 856         return(error);
 857
 858 out4:
 859         (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
 860         if (device_vnode != NULLVP) {
 861                 vnode_rele(device_vnode);
 862                 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
 863                        ctx);
 864                 did_rele = TRUE;
 865         }
 866         vnode_lock_spin(vp);
 867         vp->v_mountedhere = (mount_t) 0;
 868         vnode_unlock(vp);
 869
 870         if (have_usecount) {
 871                 vnode_rele(vp);
 872         }
 873 out3:
 874         if (devpath && ((uap->flags & MNT_UPDATE) == 0) && (!did_rele))
 875                 vnode_rele(devvp);
 876 out2:
 877         if (devpath && devvp)
 878                 vnode_put(devvp);
 879 out1:
 880         /* Release mnt_rwlock only when it was taken */
 881         if (is_rwlock_locked == TRUE) {
 882                 lck_rw_done(&mp->mnt_rwlock);
 883         }
 884         if (mntalloc) {
 885 #if CONFIG_MACF
 886                 mac_mount_label_destroy(mp);
 887 #endif
 888                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
 889         }
 890
 891         if (vfsp_ref) {
 892                 mount_list_lock();
 893                 vfsp->vfc_refcount--;
 894                 mount_list_unlock();
 895         }
 896         vnode_put(vp);
 897         vnode_put(pvp);
 898         nameidone(&nd);
 899
 900         return(error);
 901 }
 902
 903 #ifdef CONFIG_IMGSRC_ACCESS
 904 /*
 905  * Flush in-core data, check for competing mount attempts,
 906  * and set VMOUNT
 907  */
 908 static int
 909 prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname)
 910 {
 911         struct vnode_attr va;
 912         int error;
 913
 914         /*
 915          * If the user is not root, ensure that they own the directory
 916          * onto which we are attempting to mount.
 917          */
 918         VATTR_INIT(&va);
 919         VATTR_WANTED(&va, va_uid);
 920         if ((error = vnode_getattr(vp, &va, ctx)) ||
 921             (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
 922              (!vfs_context_issuser(ctx)))) {
 923                 error = EPERM;
 924                 goto out;
 925         }
 926
 927         if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
 928                 goto out;
 929
 930         if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
 931                 goto out;
 932
 933         if (vp->v_type != VDIR) {
 934                 error = ENOTDIR;
 935                 goto out;
 936         }
 937
 938         if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
 939                 error = EBUSY;
 940                 goto out;
 941         }
 942
 943 #if CONFIG_MACF
 944         error = mac_mount_check_mount(ctx, vp,
 945             cnp, fsname);
 946         if (error != 0)
 947                 goto out;
 948 #endif
 949
 950         vnode_lock_spin(vp);
 951         SET(vp->v_flag, VMOUNT);
 952         vnode_unlock(vp);
 953
 954 out:
 955         return error;
 956 }
 957
 958 static int
 959 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
 960 {
 961         struct nameidata nd;
 962         vnode_t vp;
 963         mode_t accessmode;
 964         int error;
 965
 966         NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
 967         if ( (error = namei(&nd)) )
 968                 return error;
 969
 970         strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
 971         vp = nd.ni_vp;
 972         nameidone(&nd);
 973
 974         if (vp->v_type != VBLK) {
 975                 error = ENOTBLK;
 976                 goto out;
 977         }
 978         if (major(vp->v_rdev) >= nblkdev) {
 979                 error = ENXIO;
 980                 goto out;
 981         }
 982         /*
 983          * If mount by non-root, then verify that user has necessary
 984          * permissions on the device.
 985          */
 986         if (!vfs_context_issuser(ctx)) {
 987                 accessmode = KAUTH_VNODE_READ_DATA;
 988                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
 989                         accessmode |= KAUTH_VNODE_WRITE_DATA;
 990                 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0)
 991                         goto out;
 992         }
 993
 994         *devvpp = vp;
 995 out:
 996         if (error) {
 997                 vnode_put(vp);
 998         }
 999
1000         return error;
1001 }
1002
1003 /*
1004  * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1005  * and call checkdirs()
1006  */
1007 static int
1008 place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1009 {
1010         int error;
1011
1012         mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1013
1014         vnode_lock_spin(vp);
1015         CLR(vp->v_flag, VMOUNT);
1016         vp->v_mountedhere = mp;
1017         vnode_unlock(vp);
1018
1019         /*
1020          * taking the name_cache_lock exclusively will
1021          * insure that everyone is out of the fast path who
1022          * might be trying to use a now stale copy of
1023          * vp->v_mountedhere->mnt_realrootvp
1024          * bumping mount_generation causes the cached values
1025          * to be invalidated
1026          */
1027         name_cache_lock();
1028         mount_generation++;
1029         name_cache_unlock();
1030
1031         error = vnode_ref(vp);
1032         if (error != 0) {
1033                 goto out;
1034         }
1035
1036         error = checkdirs(vp, ctx);
1037         if (error != 0)  {
1038                 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1039                 vnode_rele(vp);
1040                 goto out;
1041         }
1042
1043 out:
1044         if (error != 0) {
1045                 mp->mnt_vnodecovered = NULLVP;
1046         }
1047         return error;
1048 }
1049
1050 static void
1051 undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1052 {
1053         vnode_rele(vp);
1054         vnode_lock_spin(vp);
1055         vp->v_mountedhere = (mount_t)NULL;
1056         vnode_unlock(vp);
1057
1058         mp->mnt_vnodecovered = NULLVP;
1059 }
1060
1061 static int
1062 mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1063 {
1064         int error;
1065
1066         /* unmount in progress return error */
1067         mount_lock_spin(mp);
1068         if (mp->mnt_lflag & MNT_LUNMOUNT) {
1069                 mount_unlock(mp);
1070                 return EBUSY;
1071         }
1072         mount_unlock(mp);
1073         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1074
1075         /*
1076          * We only allow the filesystem to be reloaded if it
1077          * is currently mounted read-only.
1078          */
1079         if ((flags & MNT_RELOAD) &&
1080                         ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1081                 error = ENOTSUP;
1082                 goto out;
1083         }
1084
1085         /*
1086          * Only root, or the user that did the original mount is
1087          * permitted to update it.
1088          */
1089         if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1090                         (!vfs_context_issuser(ctx))) {
1091                 error = EPERM;
1092                 goto out;
1093         }
1094 #if CONFIG_MACF
1095         error = mac_mount_check_remount(ctx, mp);
1096         if (error != 0) {
1097                 goto out;
1098         }
1099 #endif
1100
1101 out:
1102         if (error) {
1103                 lck_rw_done(&mp->mnt_rwlock);
1104         }
1105
1106         return error;
1107 }
1108
1109 static void
1110 mount_end_update(mount_t mp)
1111 {
1112         lck_rw_done(&mp->mnt_rwlock);
1113 }
1114
1115 static int
1116 relocate_imageboot_source(vnode_t vp, struct componentname *cnp,
1117                 const char *fsname, vfs_context_t ctx,
1118                 boolean_t is64bit, user_addr_t fsmountargs)
1119 {
1120         int error;
1121         mount_t mp;
1122         boolean_t placed = FALSE;
1123         vnode_t devvp;
1124         struct vfstable *vfsp;
1125         user_addr_t devpath;
1126         char *old_mntonname;
1127
1128         /* If we didn't imageboot, nothing to move */
1129         if (imgsrc_rootvnode == NULLVP) {
1130                 return EINVAL;
1131         }
1132
1133         /* Only root can do this */
1134         if (!vfs_context_issuser(ctx)) {
1135                 return EPERM;
1136         }
1137
1138         error = vnode_get(imgsrc_rootvnode);
1139         if (error != 0) {
1140                 return error;
1141         }
1142
1143         MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1144
1145         /* Can only move once */
1146         mp = vnode_mount(imgsrc_rootvnode);
1147         if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1148                 error = EBUSY;
1149                 goto out0;
1150         }
1151
1152         /* Get exclusive rwlock on mount, authorize update on mp */
1153         error = mount_begin_update(mp , ctx, 0);
1154         if (error != 0) {
1155                 goto out0;
1156         }
1157
1158         /*
1159          * It can only be moved once.  Flag is set under the rwlock,
1160          * so we're now safe to proceed.
1161          */
1162         if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1163                 goto out1;
1164         }
1165
1166         /* Mark covered vnode as mount in progress, authorize placing mount on top */
1167         error = prepare_coveredvp(vp, ctx, cnp, fsname);
1168         if (error != 0) {
1169                 goto out1;
1170         }
1171
1172         /* Sanity check the name caller has provided */
1173         vfsp = mp->mnt_vtable;
1174         if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1175                 error = EINVAL;
1176                 goto out2;
1177         }
1178
1179         /* Check the device vnode and update mount-from name, for local filesystems */
1180         if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1181                 if (is64bit) {
1182                         if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1183                                 goto out2;
1184                         fsmountargs += sizeof(devpath);
1185                 } else {
1186                         user32_addr_t tmp;
1187                         if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1188                                 goto out2;
1189                         /* munge into LP64 addr */
1190                         devpath = CAST_USER_ADDR_T(tmp);
1191                         fsmountargs += sizeof(tmp);
1192                 }
1193
1194                 if (devpath != USER_ADDR_NULL) {
1195                         error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1196                         if (error) {
1197                                 goto out2;
1198                         }
1199
1200                         vnode_put(devvp);
1201                 }
1202         }
1203
1204         /*
1205          * Place mp on top of vnode, ref the vnode,  call checkdirs(),
1206          * and increment the name cache's mount generation
1207          */
1208         error = place_mount_and_checkdirs(mp, vp, ctx);
1209         if (error != 0) {
1210                 goto out2;
1211         }
1212
1213         placed = TRUE;
1214
1215         strncpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1216         strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1217
1218         /* Forbid future moves */
1219         mount_lock(mp);
1220         mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1221         mount_unlock(mp);
1222
1223         /* Finally, add to mount list, completely ready to go */
1224         error = mount_list_add(mp);
1225         if (error != 0) {
1226                 goto out3;
1227         }
1228
1229         mount_end_update(mp);
1230         vnode_put(imgsrc_rootvnode);
1231         FREE(old_mntonname, M_TEMP);
1232
1233         return 0;
1234 out3:
1235         strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1236
1237         mount_lock(mp);
1238         mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1239         mount_unlock(mp);
1240
1241 out2:
1242         /*
1243          * Placing the mp on the vnode clears VMOUNT,
1244          * so cleanup is different after that point
1245          */
1246         if (placed) {
1247                 /* Rele the vp, clear VMOUNT and v_mountedhere */
1248                 undo_place_on_covered_vp(mp, vp);
1249         } else {
1250                 vnode_lock_spin(vp);
1251                 CLR(vp->v_flag, VMOUNT);
1252                 vnode_unlock(vp);
1253         }
1254 out1:
1255         mount_end_update(mp);
1256
1257 out0:
1258         vnode_put(imgsrc_rootvnode);
1259         FREE(old_mntonname, M_TEMP);
1260         return error;
1261 }
1262
1263 #endif /* CONFIG_IMGSRC_ACCESS */
1264
1265 void
1266 enablequotas(struct mount *mp, vfs_context_t ctx)
1267 {
1268         struct nameidata qnd;
1269         int type;
1270         char qfpath[MAXPATHLEN];
1271         const char *qfname = QUOTAFILENAME;
1272         const char *qfopsname = QUOTAOPSNAME;
1273         const char *qfextension[] = INITQFNAMES;
1274
1275         /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1276         if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1277                 return;
1278         }
1279         /*
1280          * Enable filesystem disk quotas if necessary.
1281          * We ignore errors as this should not interfere with final mount
1282          */
1283         for (type=0; type < MAXQUOTAS; type++) {
1284                 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1285                 NDINIT(&qnd, LOOKUP, FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T(qfpath), ctx);
1286                 if (namei(&qnd) != 0)
1287                         continue;           /* option file to trigger quotas is not present */
1288                 vnode_put(qnd.ni_vp);
1289                 nameidone(&qnd);
1290                 snprintf(qfpath, sizeof(qfpath),  "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1291
1292                 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1293         }
1294         return;
1295 }
1296
1297
1298 static int
1299 checkdirs_callback(proc_t p, void * arg)
1300 {
1301         struct cdirargs * cdrp = (struct cdirargs * )arg;
1302         vnode_t olddp = cdrp->olddp;
1303         vnode_t newdp = cdrp->newdp;
1304         struct filedesc *fdp;
1305         vnode_t tvp;
1306         vnode_t fdp_cvp;
1307         vnode_t fdp_rvp;
1308         int cdir_changed = 0;
1309         int rdir_changed = 0;
1310
1311         /*
1312          * XXX Also needs to iterate each thread in the process to see if it
1313          * XXX is using a per-thread current working directory, and, if so,
1314          * XXX update that as well.
1315          */
1316
1317         proc_fdlock(p);
1318         fdp = p->p_fd;
1319         if (fdp == (struct filedesc *)0) {
1320                 proc_fdunlock(p);
1321                 return(PROC_RETURNED);
1322         }
1323         fdp_cvp = fdp->fd_cdir;
1324         fdp_rvp = fdp->fd_rdir;
1325         proc_fdunlock(p);
1326
1327         if (fdp_cvp == olddp) {
1328                 vnode_ref(newdp);
1329                 tvp = fdp->fd_cdir;
1330                 fdp_cvp = newdp;
1331                 cdir_changed = 1;
1332                 vnode_rele(tvp);
1333         }
1334         if (fdp_rvp == olddp) {
1335                 vnode_ref(newdp);
1336                 tvp = fdp->fd_rdir;
1337                 fdp_rvp = newdp;
1338                 rdir_changed = 1;
1339                 vnode_rele(tvp);
1340         }
1341         if (cdir_changed || rdir_changed) {
1342                 proc_fdlock(p);
1343                 fdp->fd_cdir = fdp_cvp;
1344                 fdp->fd_rdir = fdp_rvp;
1345                 proc_fdunlock(p);
1346         }
1347         return(PROC_RETURNED);
1348 }
1349
1350
1351
1352 /*
1353  * Scan all active processes to see if any of them have a current
1354  * or root directory onto which the new filesystem has just been
1355  * mounted. If so, replace them with the new mount point.
1356  */
1357 static int
1358 checkdirs(vnode_t olddp, vfs_context_t ctx)
1359 {
1360         vnode_t newdp;
1361         vnode_t tvp;
1362         int err;
1363         struct cdirargs cdr;
1364         struct uthread * uth = get_bsdthread_info(current_thread());
1365
1366         if (olddp->v_usecount == 1)
1367                 return(0);
1368         if (uth != (struct uthread *)0)
1369                 uth->uu_notrigger = 1;
1370         err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
1371         if (uth != (struct uthread *)0)
1372                 uth->uu_notrigger = 0;
1373
1374         if (err != 0) {
1375 #if DIAGNOSTIC
1376                 panic("mount: lost mount: error %d", err);
1377 #endif
1378                 return(err);
1379         }
1380
1381         cdr.olddp = olddp;
1382         cdr.newdp = newdp;
1383         /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1384         proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
1385
1386         if (rootvnode == olddp) {
1387                 vnode_ref(newdp);
1388                 tvp = rootvnode;
1389                 rootvnode = newdp;
1390                 vnode_rele(tvp);
1391         }
1392
1393         vnode_put(newdp);
1394         return(0);
1395 }
1396
1397 /*
1398  * Unmount a file system.
1399  *
1400  * Note: unmount takes a path to the vnode mounted on as argument,
1401  * not special file (as before).
1402  */
1403 /* ARGSUSED */
1404 int
1405 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1406 {
1407         vnode_t vp;
1408         struct mount *mp;
1409         int error;
1410         struct nameidata nd;
1411         vfs_context_t ctx = vfs_context_current();
1412
1413         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1414                 UIO_USERSPACE, uap->path, ctx);
1415         error = namei(&nd);
1416         if (error)
1417                 return (error);
1418         vp = nd.ni_vp;
1419         mp = vp->v_mount;
1420         nameidone(&nd);
1421
1422 #if CONFIG_MACF
1423         error = mac_mount_check_umount(ctx, mp);
1424         if (error != 0) {
1425                 vnode_put(vp);
1426                 return (error);
1427         }
1428 #endif
1429         /*
1430          * Must be the root of the filesystem
1431          */
1432         if ((vp->v_flag & VROOT) == 0) {
1433                 vnode_put(vp);
1434                 return (EINVAL);
1435         }
1436         mount_ref(mp, 0);
1437         vnode_put(vp);
1438         /* safedounmount consumes the mount ref */
1439         return (safedounmount(mp, uap->flags, ctx));
1440 }
1441
1442 int
1443 vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
1444 {
1445         mount_t mp;
1446
1447         mp = mount_list_lookupby_fsid(fsid, 0, 1);
1448         if (mp == (mount_t)0) {
1449                 return(ENOENT);
1450         }
1451         mount_ref(mp, 0);
1452         mount_iterdrop(mp);
1453         /* safedounmount consumes the mount ref */
1454         return(safedounmount(mp, flags, ctx));
1455 }
1456
1457
1458 /*
1459  * The mount struct comes with a mount ref which will be consumed.
1460  * Do the actual file system unmount, prevent some common foot shooting.
1461  */
1462 int
1463 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1464 {
1465         int error;
1466         proc_t p = vfs_context_proc(ctx);
1467
1468         /*
1469          * Only root, or the user that did the original mount is
1470          * permitted to unmount this filesystem.
1471          */
1472         if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1473             (error = suser(kauth_cred_get(), &p->p_acflag)))
1474                 goto out;
1475
1476         /*
1477          * Don't allow unmounting the root file system.
1478          */
1479         if (mp->mnt_flag & MNT_ROOTFS) {
1480                 error = EBUSY; /* the root is always busy */
1481                 goto out;
1482         }
1483
1484 #ifdef CONFIG_IMGSRC_ACCESS
1485         if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1486                 error = EBUSY;
1487                 goto out;
1488         }
1489 #endif /* CONFIG_IMGSRC_ACCESS */
1490
1491         return (dounmount(mp, flags, 1, ctx));
1492
1493 out:
1494         mount_drop(mp, 0);
1495         return(error);
1496 }
1497
1498 /*
1499  * Do the actual file system unmount.
1500  */
1501 int
1502 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1503 {
1504         vnode_t coveredvp = (vnode_t)0;
1505         int error;
1506         int needwakeup = 0;
1507         int forcedunmount = 0;
1508         int lflags = 0;
1509         struct vnode *devvp = NULLVP;
1510
1511         if (flags & MNT_FORCE)
1512                 forcedunmount = 1;
1513         mount_lock(mp);
1514         /* XXX post jaguar fix LK_DRAIN - then clean this up */
1515         if ((flags & MNT_FORCE)) {
1516                 mp->mnt_kern_flag |= MNTK_FRCUNMOUNT;
1517                 mp->mnt_lflag |= MNT_LFORCE;
1518         }
1519         if (mp->mnt_lflag & MNT_LUNMOUNT) {
1520                 mp->mnt_lflag |= MNT_LWAIT;
1521                 if(withref != 0)
1522                         mount_drop(mp, 1);
1523                 msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "dounmount", NULL);
1524                 /*
1525                  * The prior unmount attempt has probably succeeded.
1526                  * Do not dereference mp here - returning EBUSY is safest.
1527                  */
1528                 return (EBUSY);
1529         }
1530         mp->mnt_kern_flag |= MNTK_UNMOUNT;
1531         mp->mnt_lflag |= MNT_LUNMOUNT;
1532         mp->mnt_flag &=~ MNT_ASYNC;
1533         /*
1534          * anyone currently in the fast path that
1535          * trips over the cached rootvp will be
1536          * dumped out and forced into the slow path
1537          * to regenerate a new cached value
1538          */
1539         mp->mnt_realrootvp = NULLVP;
1540         mount_unlock(mp);
1541
1542         /*
1543          * taking the name_cache_lock exclusively will
1544          * insure that everyone is out of the fast path who
1545          * might be trying to use a now stale copy of
1546          * vp->v_mountedhere->mnt_realrootvp
1547          * bumping mount_generation causes the cached values
1548          * to be invalidated
1549          */
1550         name_cache_lock();
1551         mount_generation++;
1552         name_cache_unlock();
1553
1554
1555         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1556         if (withref != 0)
1557                 mount_drop(mp, 0);
1558 #if CONFIG_FSE
1559         fsevent_unmount(mp);  /* has to come first! */
1560 #endif
1561         error = 0;
1562         if (forcedunmount == 0) {
1563                 ubc_umount(mp); /* release cached vnodes */
1564                 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1565                         error = VFS_SYNC(mp, MNT_WAIT, ctx);
1566                         if (error) {
1567                                 mount_lock(mp);
1568                                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1569                                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1570                                 mp->mnt_lflag &= ~MNT_LFORCE;
1571                                 goto out;
1572                         }
1573                 }
1574         }
1575
1576         if (forcedunmount)
1577                 lflags |= FORCECLOSE;
1578         error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM  | SKIPROOT | lflags);
1579         if ((forcedunmount == 0) && error) {
1580                 mount_lock(mp);
1581                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1582                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1583                 mp->mnt_lflag &= ~MNT_LFORCE;
1584                 goto out;
1585         }
1586
1587         /* make sure there are no one in the mount iterations or lookup */
1588         mount_iterdrain(mp);
1589
1590         error = VFS_UNMOUNT(mp, flags, ctx);
1591         if (error) {
1592                 mount_iterreset(mp);
1593                 mount_lock(mp);
1594                 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1595                 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1596                 mp->mnt_lflag &= ~MNT_LFORCE;
1597                 goto out;
1598         }
1599
1600         /* increment the operations count */
1601         if (!error)
1602                 OSAddAtomic(1, &vfs_nummntops);
1603
1604         if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1605                 /* hold an io reference and drop the usecount before close */
1606                 devvp = mp->mnt_devvp;
1607                 vnode_getalways(devvp);
1608                 vnode_rele(devvp);
1609                 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1610                        ctx);
1611                 vnode_clearmountedon(devvp);
1612                 vnode_put(devvp);
1613         }
1614         lck_rw_done(&mp->mnt_rwlock);
1615         mount_list_remove(mp);
1616         lck_rw_lock_exclusive(&mp->mnt_rwlock);
1617
1618         /* mark the mount point hook in the vp but not drop the ref yet */
1619         if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
1620                         vnode_getwithref(coveredvp);
1621                         vnode_lock_spin(coveredvp);
1622                         coveredvp->v_mountedhere = (struct mount *)0;
1623                         vnode_unlock(coveredvp);
1624                         vnode_put(coveredvp);
1625         }
1626
1627         mount_list_lock();
1628         mp->mnt_vtable->vfc_refcount--;
1629         mount_list_unlock();
1630
1631         cache_purgevfs(mp);     /* remove cache entries for this file sys */
1632         vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
1633         mount_lock(mp);
1634         mp->mnt_lflag |= MNT_LDEAD;
1635
1636         if (mp->mnt_lflag & MNT_LWAIT) {
1637                 /*
1638                  * do the wakeup here
1639                  * in case we block in mount_refdrain
1640                  * which will drop the mount lock
1641                  * and allow anyone blocked in vfs_busy
1642                  * to wakeup and see the LDEAD state
1643                  */
1644                 mp->mnt_lflag &= ~MNT_LWAIT;
1645                 wakeup((caddr_t)mp);
1646         }
1647         mount_refdrain(mp);
1648 out:
1649         if (mp->mnt_lflag & MNT_LWAIT) {
1650                 mp->mnt_lflag &= ~MNT_LWAIT;
1651                 needwakeup = 1;
1652         }
1653         mount_unlock(mp);
1654         lck_rw_done(&mp->mnt_rwlock);
1655
1656         if (needwakeup)
1657                 wakeup((caddr_t)mp);
1658         if (!error) {
1659                 if ((coveredvp != NULLVP)) {
1660                         vnode_t pvp;
1661
1662                         vnode_getwithref(coveredvp);
1663                         pvp = vnode_getparent(coveredvp);
1664                         vnode_rele(coveredvp);
1665                         vnode_lock_spin(coveredvp);
1666                         if(mp->mnt_crossref == 0) {
1667                                 vnode_unlock(coveredvp);
1668                                 mount_lock_destroy(mp);
1669 #if CONFIG_MACF
1670                                 mac_mount_label_destroy(mp);
1671 #endif
1672                                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1673                         }  else {
1674                                 coveredvp->v_lflag |= VL_MOUNTDEAD;
1675                                 vnode_unlock(coveredvp);
1676                         }
1677                         vnode_put(coveredvp);
1678
1679                         if (pvp) {
1680                                 lock_vnode_and_post(pvp, NOTE_WRITE);
1681                                 vnode_put(pvp);
1682                         }
1683                 } else if (mp->mnt_flag & MNT_ROOTFS) {
1684                                 mount_lock_destroy(mp);
1685 #if CONFIG_MACF
1686                                 mac_mount_label_destroy(mp);
1687 #endif
1688                                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1689                 } else
1690                         panic("dounmount: no coveredvp");
1691         }
1692         return (error);
1693 }
1694
1695 void
1696 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
1697 {
1698                 vnode_lock(dp);
1699                 mp->mnt_crossref--;
1700                 if (mp->mnt_crossref < 0)
1701                         panic("mount cross refs -ve");
1702                 if (((dp->v_lflag & VL_MOUNTDEAD) == VL_MOUNTDEAD) && (mp->mnt_crossref == 0)) {
1703                         dp->v_lflag &= ~VL_MOUNTDEAD;
1704                         if (need_put)
1705                                 vnode_put_locked(dp);
1706                         vnode_unlock(dp);
1707                         mount_lock_destroy(mp);
1708 #if CONFIG_MACF
1709                         mac_mount_label_destroy(mp);
1710 #endif
1711                         FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1712                         return;
1713                 }
1714                 if (need_put)
1715                         vnode_put_locked(dp);
1716                 vnode_unlock(dp);
1717 }
1718
1719
1720 /*
1721  * Sync each mounted filesystem.
1722  */
1723 #if DIAGNOSTIC
1724 int syncprt = 0;
1725 struct ctldebug debug0 = { "syncprt", &syncprt };
1726 #endif
1727
1728 int print_vmpage_stat=0;
1729
1730 static int
1731 sync_callback(mount_t mp, void * arg)
1732 {
1733         int asyncflag;
1734
1735         if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1736                         asyncflag = mp->mnt_flag & MNT_ASYNC;
1737                         mp->mnt_flag &= ~MNT_ASYNC;
1738                         VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_current());
1739                         if (asyncflag)
1740                                 mp->mnt_flag |= MNT_ASYNC;
1741         }
1742         return(VFS_RETURNED);
1743 }
1744
1745
1746 #include <kern/clock.h>
1747
1748 clock_sec_t sync_wait_time = 0;
1749
1750 /* ARGSUSED */
1751 int
1752 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
1753 {
1754         clock_nsec_t nsecs;
1755
1756         vfs_iterate(LK_NOWAIT, sync_callback, (void *)0);
1757
1758         {
1759                 static fsid_t fsid = { { 0, 0 } };
1760
1761                 clock_get_calendar_microtime(&sync_wait_time, &nsecs);
1762                 vfs_event_signal(&fsid, VQ_SYNCEVENT, (intptr_t)NULL);
1763                 wakeup((caddr_t)&sync_wait_time);
1764         }
1765
1766         {
1767         if(print_vmpage_stat) {
1768                 vm_countdirtypages();
1769         }
1770         }
1771 #if DIAGNOSTIC
1772         if (syncprt)
1773                 vfs_bufstats();
1774 #endif /* DIAGNOSTIC */
1775         return (0);
1776 }
1777
1778 /*
1779  * Change filesystem quotas.
1780  */
1781 #if QUOTA
1782 static int quotactl_funneled(proc_t p, struct quotactl_args *uap, int32_t *retval);
1783
1784 int
1785 quotactl(proc_t p, struct quotactl_args *uap, int32_t *retval)
1786 {
1787         boolean_t funnel_state;
1788         int error;
1789
1790         funnel_state = thread_funnel_set(kernel_flock, TRUE);
1791         error = quotactl_funneled(p, uap, retval);
1792         thread_funnel_set(kernel_flock, funnel_state);
1793         return(error);
1794 }
1795
1796 static int
1797 quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
1798 {
1799         struct mount *mp;
1800         int error, quota_cmd, quota_status;
1801         caddr_t datap;
1802         size_t fnamelen;
1803         struct nameidata nd;
1804         vfs_context_t ctx = vfs_context_current();
1805         struct dqblk my_dqblk;
1806
1807         AUDIT_ARG(uid, uap->uid);
1808         AUDIT_ARG(cmd, uap->cmd);
1809         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
1810                 UIO_USERSPACE, uap->path, ctx);
1811         error = namei(&nd);
1812         if (error)
1813                 return (error);
1814         mp = nd.ni_vp->v_mount;
1815         vnode_put(nd.ni_vp);
1816         nameidone(&nd);
1817
1818         /* copyin any data we will need for downstream code */
1819         quota_cmd = uap->cmd >> SUBCMDSHIFT;
1820
1821         switch (quota_cmd) {
1822         case Q_QUOTAON:
1823                 /* uap->arg specifies a file from which to take the quotas */
1824                 fnamelen = MAXPATHLEN;
1825                 datap = kalloc(MAXPATHLEN);
1826                 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
1827                 break;
1828         case Q_GETQUOTA:
1829                 /* uap->arg is a pointer to a dqblk structure. */
1830                 datap = (caddr_t) &my_dqblk;
1831                 break;
1832         case Q_SETQUOTA:
1833         case Q_SETUSE:
1834                 /* uap->arg is a pointer to a dqblk structure. */
1835                 datap = (caddr_t) &my_dqblk;
1836                 if (proc_is64bit(p)) {
1837                         struct user_dqblk       my_dqblk64;
1838                         error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
1839                         if (error == 0) {
1840                                 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
1841                         }
1842                 }
1843                 else {
1844                         error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
1845                 }
1846                 break;
1847         case Q_QUOTASTAT:
1848                 /* uap->arg is a pointer to an integer */
1849                 datap = (caddr_t) &quota_status;
1850                 break;
1851         default:
1852                 datap = NULL;
1853                 break;
1854         } /* switch */
1855
1856         if (error == 0) {
1857                 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
1858         }
1859
1860         switch (quota_cmd) {
1861         case Q_QUOTAON:
1862                 if (datap != NULL)
1863                         kfree(datap, MAXPATHLEN);
1864                 break;
1865         case Q_GETQUOTA:
1866                 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
1867                 if (error == 0) {
1868                         if (proc_is64bit(p)) {
1869                                 struct user_dqblk       my_dqblk64;
1870                                 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
1871                                 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
1872                         }
1873                         else {
1874                                 error = copyout(datap, uap->arg, sizeof (struct dqblk));
1875                         }
1876                 }
1877                 break;
1878         case Q_QUOTASTAT:
1879                 /* uap->arg is a pointer to an integer */
1880                 if (error == 0) {
1881                         error = copyout(datap, uap->arg, sizeof(quota_status));
1882                 }
1883                 break;
1884         default:
1885                 break;
1886         } /* switch */
1887
1888         return (error);
1889 }
1890 #else
1891 int
1892 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
1893 {
1894         return (EOPNOTSUPP);
1895 }
1896 #endif /* QUOTA */
1897
1898 /*
1899  * Get filesystem statistics.
1900  *
1901  * Returns:     0                       Success
1902  *      namei:???
1903  *      vfs_update_vfsstat:???
1904  *      munge_statfs:EFAULT
1905  */
1906 /* ARGSUSED */
1907 int
1908 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
1909 {
1910         struct mount *mp;
1911         struct vfsstatfs *sp;
1912         int error;
1913         struct nameidata nd;
1914         vfs_context_t ctx = vfs_context_current();
1915         vnode_t vp;
1916
1917         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1918                 UIO_USERSPACE, uap->path, ctx);
1919         error = namei(&nd);
1920         if (error)
1921                 return (error);
1922         vp = nd.ni_vp;
1923         mp = vp->v_mount;
1924         sp = &mp->mnt_vfsstat;
1925         nameidone(&nd);
1926
1927         error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
1928         vnode_put(vp);
1929         if (error != 0)
1930                 return (error);
1931
1932         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
1933         return (error);
1934 }
1935
1936 /*
1937  * Get filesystem statistics.
1938  */
1939 /* ARGSUSED */
1940 int
1941 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
1942 {
1943         vnode_t vp;
1944         struct mount *mp;
1945         struct vfsstatfs *sp;
1946         int error;
1947
1948         AUDIT_ARG(fd, uap->fd);
1949
1950         if ( (error = file_vnode(uap->fd, &vp)) )
1951                 return (error);
1952
1953         error = vnode_getwithref(vp);
1954         if (error) {
1955                 file_drop(uap->fd);
1956                 return (error);
1957         }
1958
1959         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
1960
1961         mp = vp->v_mount;
1962         if (!mp) {
1963                 error = EBADF;
1964                 goto out;
1965         }
1966         sp = &mp->mnt_vfsstat;
1967         if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
1968                 goto out;
1969         }
1970
1971         error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
1972
1973 out:
1974         file_drop(uap->fd);
1975         vnode_put(vp);
1976
1977         return (error);
1978 }
1979
1980 /*
1981  * Common routine to handle copying of statfs64 data to user space
1982  */
1983 static int
1984 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
1985 {
1986         int error;
1987         struct statfs64 sfs;
1988
1989         bzero(&sfs, sizeof(sfs));
1990
1991         sfs.f_bsize = sfsp->f_bsize;
1992         sfs.f_iosize = (int32_t)sfsp->f_iosize;
1993         sfs.f_blocks = sfsp->f_blocks;
1994         sfs.f_bfree = sfsp->f_bfree;
1995         sfs.f_bavail = sfsp->f_bavail;
1996         sfs.f_files = sfsp->f_files;
1997         sfs.f_ffree = sfsp->f_ffree;
1998         sfs.f_fsid = sfsp->f_fsid;
1999         sfs.f_owner = sfsp->f_owner;
2000         sfs.f_type = mp->mnt_vtable->vfc_typenum;
2001         sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2002         sfs.f_fssubtype = sfsp->f_fssubtype;
2003         strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2004         strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2005         strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2006
2007         error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2008
2009         return(error);
2010 }
2011
2012 /*
2013  * Get file system statistics in 64-bit mode
2014  */
2015 int
2016 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2017 {
2018         struct mount *mp;
2019         struct vfsstatfs *sp;
2020         int error;
2021         struct nameidata nd;
2022         vfs_context_t ctxp = vfs_context_current();
2023         vnode_t vp;
2024
2025         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
2026                 UIO_USERSPACE, uap->path, ctxp);
2027         error = namei(&nd);
2028         if (error)
2029                 return (error);
2030         vp = nd.ni_vp;
2031         mp = vp->v_mount;
2032         sp = &mp->mnt_vfsstat;
2033         nameidone(&nd);
2034
2035         error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
2036         vnode_put(vp);
2037         if (error != 0)
2038                 return (error);
2039
2040         error = statfs64_common(mp, sp, uap->buf);
2041
2042         return (error);
2043 }
2044
2045 /*
2046  * Get file system statistics in 64-bit mode
2047  */
2048 int
2049 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2050 {
2051         struct vnode *vp;
2052         struct mount *mp;
2053         struct vfsstatfs *sp;
2054         int error;
2055
2056         AUDIT_ARG(fd, uap->fd);
2057
2058         if ( (error = file_vnode(uap->fd, &vp)) )
2059                 return (error);
2060
2061         error = vnode_getwithref(vp);
2062         if (error) {
2063                 file_drop(uap->fd);
2064                 return (error);
2065         }
2066
2067         AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2068
2069         mp = vp->v_mount;
2070         if (!mp) {
2071                 error = EBADF;
2072                 goto out;
2073         }
2074         sp = &mp->mnt_vfsstat;
2075         if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2076                 goto out;
2077         }
2078
2079         error = statfs64_common(mp, sp, uap->buf);
2080
2081 out:
2082         file_drop(uap->fd);
2083         vnode_put(vp);
2084
2085         return (error);
2086 }
2087
2088 struct getfsstat_struct {
2089         user_addr_t     sfsp;
2090         user_addr_t     *mp;
2091         int             count;
2092         int             maxcount;
2093         int             flags;
2094         int             error;
2095 };
2096
2097
2098 static int
2099 getfsstat_callback(mount_t mp, void * arg)
2100 {
2101
2102         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2103         struct vfsstatfs *sp;
2104         int error, my_size;
2105         vfs_context_t ctx = vfs_context_current();
2106
2107         if (fstp->sfsp && fstp->count < fstp->maxcount) {
2108                 sp = &mp->mnt_vfsstat;
2109                 /*
2110                  * If MNT_NOWAIT is specified, do not refresh the
2111                  * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2112                  */
2113                 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2114                         (error = vfs_update_vfsstat(mp, ctx,
2115                             VFS_USER_EVENT))) {
2116                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2117                         return(VFS_RETURNED);
2118                 }
2119
2120                 /*
2121                  * Need to handle LP64 version of struct statfs
2122                  */
2123                 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
2124                 if (error) {
2125                         fstp->error = error;
2126                         return(VFS_RETURNED_DONE);
2127                 }
2128                 fstp->sfsp += my_size;
2129
2130                 if (fstp->mp) {
2131                         error = mac_mount_label_get(mp, *fstp->mp);
2132                         if (error) {
2133                                 fstp->error = error;
2134                                 return(VFS_RETURNED_DONE);
2135                         }
2136                         fstp->mp++;
2137                 }
2138         }
2139         fstp->count++;
2140         return(VFS_RETURNED);
2141 }
2142
2143 /*
2144  * Get statistics on all filesystems.
2145  */
2146 int
2147 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2148 {
2149         struct __mac_getfsstat_args muap;
2150
2151         muap.buf = uap->buf;
2152         muap.bufsize = uap->bufsize;
2153         muap.mac = USER_ADDR_NULL;
2154         muap.macsize = 0;
2155         muap.flags = uap->flags;
2156
2157         return (__mac_getfsstat(p, &muap, retval));
2158 }
2159
2160 /*
2161  * __mac_getfsstat: Get MAC-related file system statistics
2162  *
2163  * Parameters:    p                        (ignored)
2164  *                uap                      User argument descriptor (see below)
2165  *                retval                   Count of file system statistics (N stats)
2166  *
2167  * Indirect:      uap->bufsize             Buffer size
2168  *                uap->macsize             MAC info size
2169  *                uap->buf                 Buffer where information will be returned
2170  *                uap->mac                 MAC info
2171  *                uap->flags               File system flags
2172  *
2173  *
2174  * Returns:        0                       Success
2175  *                !0                       Not success
2176  *
2177  */
2178 int
2179 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
2180 {
2181         user_addr_t sfsp;
2182         user_addr_t *mp;
2183         size_t count, maxcount, bufsize, macsize;
2184         struct getfsstat_struct fst;
2185
2186         bufsize = (size_t) uap->bufsize;
2187         macsize = (size_t) uap->macsize;
2188
2189         if (IS_64BIT_PROCESS(p)) {
2190                 maxcount = bufsize / sizeof(struct user64_statfs);
2191         }
2192         else {
2193                 maxcount = bufsize / sizeof(struct user32_statfs);
2194         }
2195         sfsp = uap->buf;
2196         count = 0;
2197
2198         mp = NULL;
2199
2200 #if CONFIG_MACF
2201         if (uap->mac != USER_ADDR_NULL) {
2202                 u_int32_t *mp0;
2203                 int error;
2204                 unsigned int i;
2205
2206                 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2207                 if (count != maxcount)
2208                         return (EINVAL);
2209
2210                 /* Copy in the array */
2211                 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2212                 if (mp0 == NULL) {
2213                         return (ENOMEM);
2214                 }
2215
2216                 error = copyin(uap->mac, mp0, macsize);
2217                 if (error) {
2218                         FREE(mp0, M_MACTEMP);
2219                         return (error);
2220                 }
2221
2222                 /* Normalize to an array of user_addr_t */
2223                 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
2224                 if (mp == NULL) {
2225                         FREE(mp0, M_MACTEMP);
2226                         return (ENOMEM);
2227                 }
2228
2229                 for (i = 0; i < count; i++) {
2230                         if (IS_64BIT_PROCESS(p))
2231                                 mp[i] = ((user_addr_t *)mp0)[i];
2232                         else
2233                                 mp[i] = (user_addr_t)mp0[i];
2234                 }
2235                 FREE(mp0, M_MACTEMP);
2236         }
2237 #endif
2238
2239
2240         fst.sfsp = sfsp;
2241         fst.mp = mp;
2242         fst.flags = uap->flags;
2243         fst.count = 0;
2244         fst.error = 0;
2245         fst.maxcount = maxcount;
2246
2247
2248         vfs_iterate(0, getfsstat_callback, &fst);
2249
2250         if (mp)
2251                 FREE(mp, M_MACTEMP);
2252
2253         if (fst.error ) {
2254                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2255                 return(fst.error);
2256         }
2257
2258         if (fst.sfsp && fst.count > fst.maxcount)
2259                 *retval = fst.maxcount;
2260         else
2261                 *retval = fst.count;
2262         return (0);
2263 }
2264
2265 static int
2266 getfsstat64_callback(mount_t mp, void * arg)
2267 {
2268         struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2269         struct vfsstatfs *sp;
2270         int error;
2271
2272         if (fstp->sfsp && fstp->count < fstp->maxcount) {
2273                 sp = &mp->mnt_vfsstat;
2274                 /*
2275                  * If MNT_NOWAIT is specified, do not refresh the fsstat
2276                  * cache. MNT_WAIT overrides MNT_NOWAIT.
2277                  *
2278                  * We treat MNT_DWAIT as MNT_WAIT for all instances of
2279                  * getfsstat, since the constants are out of the same
2280                  * namespace.
2281                  */
2282                 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2283                      (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2284                     (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2285                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2286                         return(VFS_RETURNED);
2287                 }
2288
2289                 error = statfs64_common(mp, sp, fstp->sfsp);
2290                 if (error) {
2291                         fstp->error = error;
2292                         return(VFS_RETURNED_DONE);
2293                 }
2294                 fstp->sfsp += sizeof(struct statfs64);
2295         }
2296         fstp->count++;
2297         return(VFS_RETURNED);
2298 }
2299
2300 /*
2301  * Get statistics on all file systems in 64 bit mode.
2302  */
2303 int
2304 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2305 {
2306         user_addr_t sfsp;
2307         int count, maxcount;
2308         struct getfsstat_struct fst;
2309
2310         maxcount = uap->bufsize / sizeof(struct statfs64);
2311
2312         sfsp = uap->buf;
2313         count = 0;
2314
2315         fst.sfsp = sfsp;
2316         fst.flags = uap->flags;
2317         fst.count = 0;
2318         fst.error = 0;
2319         fst.maxcount = maxcount;
2320
2321         vfs_iterate(0, getfsstat64_callback, &fst);
2322
2323         if (fst.error ) {
2324                 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2325                 return(fst.error);
2326         }
2327
2328         if (fst.sfsp && fst.count > fst.maxcount)
2329                 *retval = fst.maxcount;
2330         else
2331                 *retval = fst.count;
2332
2333         return (0);
2334 }
2335
2336 /*
2337  * Change current working directory to a given file descriptor.
2338  */
2339 /* ARGSUSED */
2340 static int
2341 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
2342 {
2343         struct filedesc *fdp = p->p_fd;
2344         vnode_t vp;
2345         vnode_t tdp;
2346         vnode_t tvp;
2347         struct mount *mp;
2348         int error;
2349         vfs_context_t ctx = vfs_context_current();
2350
2351         AUDIT_ARG(fd, uap->fd);
2352         if (per_thread && uap->fd == -1) {
2353                 /*
2354                  * Switching back from per-thread to per process CWD; verify we
2355                  * in fact have one before proceeding.  The only success case
2356                  * for this code path is to return 0 preemptively after zapping
2357                  * the thread structure contents.
2358                  */
2359                 thread_t th = vfs_context_thread(ctx);
2360                 if (th) {
2361                         uthread_t uth = get_bsdthread_info(th);
2362                         tvp = uth->uu_cdir;
2363                         uth->uu_cdir = NULLVP;
2364                         if (tvp != NULLVP) {
2365                                 vnode_rele(tvp);
2366                                 return (0);
2367                         }
2368                 }
2369                 return (EBADF);
2370         }
2371
2372         if ( (error = file_vnode(uap->fd, &vp)) )
2373                 return(error);
2374         if ( (error = vnode_getwithref(vp)) ) {
2375                 file_drop(uap->fd);
2376                 return(error);
2377         }
2378
2379         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2380
2381         if (vp->v_type != VDIR) {
2382                 error = ENOTDIR;
2383                 goto out;
2384         }
2385
2386 #if CONFIG_MACF
2387         error = mac_vnode_check_chdir(ctx, vp);
2388         if (error)
2389                 goto out;
2390 #endif
2391         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2392         if (error)
2393                 goto out;
2394
2395         while (!error && (mp = vp->v_mountedhere) != NULL) {
2396                 if (vfs_busy(mp, LK_NOWAIT)) {
2397                         error = EACCES;
2398                         goto out;
2399                 }
2400                 error = VFS_ROOT(mp, &tdp, ctx);
2401                 vfs_unbusy(mp);
2402                 if (error)
2403                         break;
2404                 vnode_put(vp);
2405                 vp = tdp;
2406         }
2407         if (error)
2408                 goto out;
2409         if ( (error = vnode_ref(vp)) )
2410                 goto out;
2411         vnode_put(vp);
2412
2413         if (per_thread) {
2414                 thread_t th = vfs_context_thread(ctx);
2415                 if (th) {
2416                         uthread_t uth = get_bsdthread_info(th);
2417                         tvp = uth->uu_cdir;
2418                         uth->uu_cdir = vp;
2419                         OSBitOrAtomic(P_THCWD, &p->p_flag);
2420                 } else {
2421                         vnode_rele(vp);
2422                         return (ENOENT);
2423                 }
2424         } else {
2425                 proc_fdlock(p);
2426                 tvp = fdp->fd_cdir;
2427                 fdp->fd_cdir = vp;
2428                 proc_fdunlock(p);
2429         }
2430
2431         if (tvp)
2432                 vnode_rele(tvp);
2433         file_drop(uap->fd);
2434
2435         return (0);
2436 out:
2437         vnode_put(vp);
2438         file_drop(uap->fd);
2439
2440         return(error);
2441 }
2442
2443 int
2444 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
2445 {
2446         return common_fchdir(p, uap, 0);
2447 }
2448
2449 int
2450 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
2451 {
2452         return common_fchdir(p, (void *)uap, 1);
2453 }
2454
2455 /*
2456  * Change current working directory (".").
2457  *
2458  * Returns:     0                       Success
2459  *      change_dir:ENOTDIR
2460  *      change_dir:???
2461  *      vnode_ref:ENOENT                No such file or directory
2462  */
2463 /* ARGSUSED */
2464 static int
2465 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
2466 {
2467         struct filedesc *fdp = p->p_fd;
2468         int error;
2469         struct nameidata nd;
2470         vnode_t tvp;
2471         vfs_context_t ctx = vfs_context_current();
2472
2473         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
2474                 UIO_USERSPACE, uap->path, ctx);
2475         error = change_dir(&nd, ctx);
2476         if (error)
2477                 return (error);
2478         if ( (error = vnode_ref(nd.ni_vp)) ) {
2479                 vnode_put(nd.ni_vp);
2480                 return (error);
2481         }
2482         /*
2483          * drop the iocount we picked up in change_dir
2484          */
2485         vnode_put(nd.ni_vp);
2486
2487         if (per_thread) {
2488                 thread_t th = vfs_context_thread(ctx);
2489                 if (th) {
2490                         uthread_t uth = get_bsdthread_info(th);
2491                         tvp = uth->uu_cdir;
2492                         uth->uu_cdir = nd.ni_vp;
2493                         OSBitOrAtomic(P_THCWD, &p->p_flag);
2494                 } else {
2495                         vnode_rele(nd.ni_vp);
2496                         return (ENOENT);
2497                 }
2498         } else {
2499                 proc_fdlock(p);
2500                 tvp = fdp->fd_cdir;
2501                 fdp->fd_cdir = nd.ni_vp;
2502                 proc_fdunlock(p);
2503         }
2504
2505         if (tvp)
2506                 vnode_rele(tvp);
2507
2508         return (0);
2509 }
2510
2511
2512 /*
2513  * chdir
2514  *
2515  * Change current working directory (".") for the entire process
2516  *
2517  * Parameters:  p       Process requesting the call
2518  *              uap     User argument descriptor (see below)
2519  *              retval  (ignored)
2520  *
2521  * Indirect parameters: uap->path       Directory path
2522  *
2523  * Returns:     0                       Success
2524  *              common_chdir: ENOTDIR
2525  *              common_chdir: ENOENT    No such file or directory
2526  *              common_chdir: ???
2527  *
2528  */
2529 int
2530 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
2531 {
2532         return common_chdir(p, (void *)uap, 0);
2533 }
2534
2535 /*
2536  * __pthread_chdir
2537  *
2538  * Change current working directory (".") for a single thread
2539  *
2540  * Parameters:  p       Process requesting the call
2541  *              uap     User argument descriptor (see below)
2542  *              retval  (ignored)
2543  *
2544  * Indirect parameters: uap->path       Directory path
2545  *
2546  * Returns:     0                       Success
2547  *              common_chdir: ENOTDIR
2548  *              common_chdir: ENOENT    No such file or directory
2549  *              common_chdir: ???
2550  *
2551  */
2552 int
2553 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
2554 {
2555         return common_chdir(p, (void *)uap, 1);
2556 }
2557
2558
2559 /*
2560  * Change notion of root (``/'') directory.
2561  */
2562 /* ARGSUSED */
2563 int
2564 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
2565 {
2566         struct filedesc *fdp = p->p_fd;
2567         int error;
2568         struct nameidata nd;
2569         vnode_t tvp;
2570         vfs_context_t ctx = vfs_context_current();
2571
2572         if ((error = suser(kauth_cred_get(), &p->p_acflag)))
2573                 return (error);
2574
2575         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
2576                 UIO_USERSPACE, uap->path, ctx);
2577         error = change_dir(&nd, ctx);
2578         if (error)
2579                 return (error);
2580
2581 #if CONFIG_MACF
2582         error = mac_vnode_check_chroot(ctx, nd.ni_vp,
2583             &nd.ni_cnd);
2584         if (error) {
2585                 vnode_put(nd.ni_vp);
2586                 return (error);
2587         }
2588 #endif
2589
2590         if ( (error = vnode_ref(nd.ni_vp)) ) {
2591                 vnode_put(nd.ni_vp);
2592                 return (error);
2593         }
2594         vnode_put(nd.ni_vp);
2595
2596         proc_fdlock(p);
2597         tvp = fdp->fd_rdir;
2598         fdp->fd_rdir = nd.ni_vp;
2599         fdp->fd_flags |= FD_CHROOT;
2600         proc_fdunlock(p);
2601
2602         if (tvp != NULL)
2603                 vnode_rele(tvp);
2604
2605         return (0);
2606 }
2607
2608 /*
2609  * Common routine for chroot and chdir.
2610  *
2611  * Returns:     0                       Success
2612  *              ENOTDIR                 Not a directory
2613  *              namei:???               [anything namei can return]
2614  *              vnode_authorize:???     [anything vnode_authorize can return]
2615  */
2616 static int
2617 change_dir(struct nameidata *ndp, vfs_context_t ctx)
2618 {
2619         vnode_t vp;
2620         int error;
2621
2622         if ((error = namei(ndp)))
2623                 return (error);
2624         nameidone(ndp);
2625         vp = ndp->ni_vp;
2626
2627         if (vp->v_type != VDIR) {
2628                 vnode_put(vp);
2629                 return (ENOTDIR);
2630         }
2631
2632 #if CONFIG_MACF
2633         error = mac_vnode_check_chdir(ctx, vp);
2634         if (error) {
2635                 vnode_put(vp);
2636                 return (error);
2637         }
2638 #endif
2639
2640         error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2641         if (error) {
2642                 vnode_put(vp);
2643                 return (error);
2644         }
2645
2646         return (error);
2647 }
2648
2649 /*
2650  * Check permissions, allocate an open file structure,
2651  * and call the device open routine if any.
2652  *
2653  * Returns:     0                       Success
2654  *              EINVAL
2655  *              EINTR
2656  *      falloc:ENFILE
2657  *      falloc:EMFILE
2658  *      falloc:ENOMEM
2659  *      vn_open_auth:???
2660  *      dupfdopen:???
2661  *      VNOP_ADVLOCK:???
2662  *      vnode_setsize:???
2663  *
2664  * XXX Need to implement uid, gid
2665  */
2666 int
2667 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *vap, int32_t *retval)
2668 {
2669         proc_t p = vfs_context_proc(ctx);
2670         uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2671         struct filedesc *fdp = p->p_fd;
2672         struct fileproc *fp;
2673         vnode_t vp;
2674         int flags, oflags;
2675         struct fileproc *nfp;
2676         int type, indx, error;
2677         struct flock lf;
2678         int no_controlling_tty = 0;
2679         int deny_controlling_tty = 0;
2680         struct session *sessp = SESSION_NULL;
2681         struct vfs_context context = *vfs_context_current();    /* local copy */
2682
2683         oflags = uflags;
2684
2685         if ((oflags & O_ACCMODE) == O_ACCMODE)
2686                 return(EINVAL);
2687         flags = FFLAGS(uflags);
2688
2689         AUDIT_ARG(fflags, oflags);
2690         AUDIT_ARG(mode, vap->va_mode);
2691
2692         if ( (error = falloc(p, &nfp, &indx, ctx)) ) {
2693                 return (error);
2694         }
2695         fp = nfp;
2696         uu->uu_dupfd = -indx - 1;
2697
2698         if (!(p->p_flag & P_CONTROLT)) {
2699                 sessp = proc_session(p);
2700                 no_controlling_tty = 1;
2701                 /*
2702                  * If conditions would warrant getting a controlling tty if
2703                  * the device being opened is a tty (see ttyopen in tty.c),
2704                  * but the open flags deny it, set a flag in the session to
2705                  * prevent it.
2706                  */
2707                 if (SESS_LEADER(p, sessp) &&
2708                     sessp->s_ttyvp == NULL &&
2709                     (flags & O_NOCTTY)) {
2710                         session_lock(sessp);
2711                         sessp->s_flags |= S_NOCTTY;
2712                         session_unlock(sessp);
2713                         deny_controlling_tty = 1;
2714                 }
2715         }
2716
2717         if ((error = vn_open_auth(ndp, &flags, vap))) {
2718                 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){        /* XXX from fdopen */
2719                         if ((error = dupfdopen(fdp, indx, uu->uu_dupfd, flags, error)) == 0) {
2720                                 fp_drop(p, indx, NULL, 0);
2721                                 *retval = indx;
2722                                 if (deny_controlling_tty) {
2723                                         session_lock(sessp);
2724                                         sessp->s_flags &= ~S_NOCTTY;
2725                                         session_unlock(sessp);
2726                                 }
2727                                 if (sessp != SESSION_NULL)
2728                                         session_rele(sessp);
2729                                 return (0);
2730                         }
2731                 }
2732                 if (error == ERESTART)
2733                         error = EINTR;
2734                 fp_free(p, indx, fp);
2735
2736                 if (deny_controlling_tty) {
2737                         session_lock(sessp);
2738                         sessp->s_flags &= ~S_NOCTTY;
2739                         session_unlock(sessp);
2740                 }
2741                 if (sessp != SESSION_NULL)
2742                         session_rele(sessp);
2743                 return (error);
2744         }
2745         uu->uu_dupfd = 0;
2746         vp = ndp->ni_vp;
2747
2748         fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
2749         fp->f_fglob->fg_type = DTYPE_VNODE;
2750         fp->f_fglob->fg_ops = &vnops;
2751         fp->f_fglob->fg_data = (caddr_t)vp;
2752
2753         if (flags & (O_EXLOCK | O_SHLOCK)) {
2754                 lf.l_whence = SEEK_SET;
2755                 lf.l_start = 0;
2756                 lf.l_len = 0;
2757                 if (flags & O_EXLOCK)
2758                         lf.l_type = F_WRLCK;
2759                 else
2760                         lf.l_type = F_RDLCK;
2761                 type = F_FLOCK;
2762                 if ((flags & FNONBLOCK) == 0)
2763                         type |= F_WAIT;
2764 #if CONFIG_MACF
2765                 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
2766                     F_SETLK, &lf);
2767                 if (error)
2768                         goto bad;
2769 #endif
2770                 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx)))
2771                         goto bad;
2772                 fp->f_fglob->fg_flag |= FHASLOCK;
2773         }
2774
2775         /* try to truncate by setting the size attribute */
2776         if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
2777                 goto bad;
2778
2779         /*
2780          * If the open flags denied the acquisition of a controlling tty,
2781          * clear the flag in the session structure that prevented the lower
2782          * level code from assigning one.
2783          */
2784         if (deny_controlling_tty) {
2785                 session_lock(sessp);
2786                 sessp->s_flags &= ~S_NOCTTY;
2787                 session_unlock(sessp);
2788         }
2789
2790         /*
2791          * If a controlling tty was set by the tty line discipline, then we
2792          * want to set the vp of the tty into the session structure.  We have
2793          * a race here because we can't get to the vp for the tp in ttyopen,
2794          * because it's not passed as a parameter in the open path.
2795          */
2796         if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
2797                 vnode_t ttyvp;
2798                 vnode_ref(vp);
2799                 session_lock(sessp);
2800                 ttyvp = sessp->s_ttyvp;
2801                 sessp->s_ttyvp = vp;
2802                 sessp->s_ttyvid = vnode_vid(vp);
2803                 session_unlock(sessp);
2804                 if (ttyvp != NULLVP)
2805                         vnode_rele(ttyvp);
2806         }
2807
2808         vnode_put(vp);
2809
2810         proc_fdlock(p);
2811         procfdtbl_releasefd(p, indx, NULL);
2812         fp_drop(p, indx, fp, 1);
2813         proc_fdunlock(p);
2814
2815         *retval = indx;
2816
2817         if (sessp != SESSION_NULL)
2818                 session_rele(sessp);
2819         return (0);
2820 bad:
2821         if (deny_controlling_tty) {
2822                 session_lock(sessp);
2823                 sessp->s_flags &= ~S_NOCTTY;
2824                 session_unlock(sessp);
2825         }
2826         if (sessp != SESSION_NULL)
2827                 session_rele(sessp);
2828
2829         /* Modify local copy (to not damage thread copy) */
2830         context.vc_ucred = fp->f_fglob->fg_cred;
2831
2832         vn_close(vp, fp->f_fglob->fg_flag, &context);
2833         vnode_put(vp);
2834         fp_free(p, indx, fp);
2835
2836         return (error);
2837
2838 }
2839
2840 /*
2841  * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
2842  *
2843  * Parameters:  p                       Process requesting the open
2844  *              uap                     User argument descriptor (see below)
2845  *              retval                  Pointer to an area to receive the
2846  *                                      return calue from the system call
2847  *
2848  * Indirect:    uap->path               Path to open (same as 'open')
2849  *              uap->flags              Flags to open (same as 'open'
2850  *              uap->uid                UID to set, if creating
2851  *              uap->gid                GID to set, if creating
2852  *              uap->mode               File mode, if creating (same as 'open')
2853  *              uap->xsecurity          ACL to set, if creating
2854  *
2855  * Returns:     0                       Success
2856  *              !0                      errno value
2857  *
2858  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
2859  *
2860  * XXX:         We should enummerate the possible errno values here, and where
2861  *              in the code they originated.
2862  */
2863 int
2864 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
2865 {
2866         struct filedesc *fdp = p->p_fd;
2867         int ciferror;
2868         kauth_filesec_t xsecdst;
2869         struct vnode_attr va;
2870         struct nameidata nd;
2871         int cmode;
2872
2873         AUDIT_ARG(owner, uap->uid, uap->gid);
2874
2875         xsecdst = NULL;
2876         if ((uap->xsecurity != USER_ADDR_NULL) &&
2877             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
2878                 return ciferror;
2879
2880         VATTR_INIT(&va);
2881         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2882         VATTR_SET(&va, va_mode, cmode);
2883         if (uap->uid != KAUTH_UID_NONE)
2884                 VATTR_SET(&va, va_uid, uap->uid);
2885         if (uap->gid != KAUTH_GID_NONE)
2886                 VATTR_SET(&va, va_gid, uap->gid);
2887         if (xsecdst != NULL)
2888                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
2889
2890         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
2891
2892         ciferror = open1(vfs_context_current(), &nd, uap->flags, &va, retval);
2893         if (xsecdst != NULL)
2894                 kauth_filesec_free(xsecdst);
2895
2896         return ciferror;
2897 }
2898
2899 int
2900 open(proc_t p, struct open_args *uap, int32_t *retval)
2901 {
2902         __pthread_testcancel(1);
2903         return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
2904 }
2905
2906 int
2907 open_nocancel(proc_t p, struct open_nocancel_args *uap, int32_t *retval)
2908 {
2909         struct filedesc *fdp = p->p_fd;
2910         struct vnode_attr va;
2911         struct nameidata nd;
2912         int cmode;
2913
2914         VATTR_INIT(&va);
2915         /* Mask off all but regular access permissions */
2916         cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2917         VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
2918
2919         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
2920
2921         return(open1(vfs_context_current(), &nd, uap->flags, &va, retval));
2922 }
2923
2924
2925 /*
2926  * Create a special file.
2927  */
2928 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
2929
2930 int
2931 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
2932 {
2933         struct vnode_attr va;
2934         vfs_context_t ctx = vfs_context_current();
2935         int error;
2936         int whiteout = 0;
2937         struct nameidata nd;
2938         vnode_t vp, dvp;
2939
2940         VATTR_INIT(&va);
2941         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2942         VATTR_SET(&va, va_rdev, uap->dev);
2943
2944         /* If it's a mknod() of a FIFO, call mkfifo1() instead */
2945         if ((uap->mode & S_IFMT) == S_IFIFO)
2946                 return(mkfifo1(ctx, uap->path, &va));
2947
2948         AUDIT_ARG(mode, uap->mode);
2949         AUDIT_ARG(value32, uap->dev);
2950
2951         if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
2952                 return (error);
2953         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
2954                 UIO_USERSPACE, uap->path, ctx);
2955         error = namei(&nd);
2956         if (error)
2957                 return (error);
2958         dvp = nd.ni_dvp;
2959         vp = nd.ni_vp;
2960
2961         if (vp != NULL) {
2962                 error = EEXIST;
2963                 goto out;
2964         }
2965
2966         switch (uap->mode & S_IFMT) {
2967         case S_IFMT:    /* used by badsect to flag bad sectors */
2968                 VATTR_SET(&va, va_type, VBAD);
2969                 break;
2970         case S_IFCHR:
2971                 VATTR_SET(&va, va_type, VCHR);
2972                 break;
2973         case S_IFBLK:
2974                 VATTR_SET(&va, va_type, VBLK);
2975                 break;
2976         case S_IFWHT:
2977                 whiteout = 1;
2978                 break;
2979         default:
2980                 error = EINVAL;
2981                 goto out;
2982         }
2983
2984 #if CONFIG_MACF
2985         if (!whiteout) {
2986                 error = mac_vnode_check_create(ctx,
2987                     nd.ni_dvp, &nd.ni_cnd, &va);
2988                 if (error)
2989                         goto out;
2990         }
2991 #endif
2992
2993         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2994                 goto out;
2995
2996         if (whiteout) {
2997                 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, CREATE, ctx);
2998         } else {
2999                 error = vn_create(dvp, &vp, &nd.ni_cnd, &va, 0, ctx);
3000         }
3001         if (error)
3002                 goto out;
3003
3004         if (vp) {
3005                 int     update_flags = 0;
3006
3007                 // Make sure the name & parent pointers are hooked up
3008                 if (vp->v_name == NULL)
3009                         update_flags |= VNODE_UPDATE_NAME;
3010                 if (vp->v_parent == NULLVP)
3011                         update_flags |= VNODE_UPDATE_PARENT;
3012
3013                 if (update_flags)
3014                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3015
3016 #if CONFIG_FSE
3017                 add_fsevent(FSE_CREATE_FILE, ctx,
3018                     FSE_ARG_VNODE, vp,
3019                     FSE_ARG_DONE);
3020 #endif
3021         }
3022
3023 out:
3024         /*
3025          * nameidone has to happen before we vnode_put(dvp)
3026          * since it may need to release the fs_nodelock on the dvp
3027          */
3028         nameidone(&nd);
3029
3030         if (vp)
3031                 vnode_put(vp);
3032         vnode_put(dvp);
3033
3034         return (error);
3035 }
3036
3037 /*
3038  * Create a named pipe.
3039  *
3040  * Returns:     0                       Success
3041  *              EEXIST
3042  *      namei:???
3043  *      vnode_authorize:???
3044  *      vn_create:???
3045  */
3046 static int
3047 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
3048 {
3049         vnode_t vp, dvp;
3050         int error;
3051         struct nameidata nd;
3052
3053         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
3054                 UIO_USERSPACE, upath, ctx);
3055         error = namei(&nd);
3056         if (error)
3057                 return (error);
3058         dvp = nd.ni_dvp;
3059         vp = nd.ni_vp;
3060
3061         /* check that this is a new file and authorize addition */
3062         if (vp != NULL) {
3063                 error = EEXIST;
3064                 goto out;
3065         }
3066         VATTR_SET(vap, va_type, VFIFO);
3067
3068 #if CONFIG_MACF
3069         error = mac_vnode_check_create(ctx, nd.ni_dvp,
3070             &nd.ni_cnd, vap);
3071         if (error)
3072                 goto out;
3073 #endif
3074
3075
3076         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3077                 goto out;
3078
3079
3080         error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx);
3081 out:
3082         /*
3083          * nameidone has to happen before we vnode_put(dvp)
3084          * since it may need to release the fs_nodelock on the dvp
3085          */
3086         nameidone(&nd);
3087
3088         if (vp)
3089                 vnode_put(vp);
3090         vnode_put(dvp);
3091
3092         return error;
3093 }
3094
3095
3096 /*
3097  * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
3098  *
3099  * Parameters:  p                       Process requesting the open
3100  *              uap                     User argument descriptor (see below)
3101  *              retval                  (Ignored)
3102  *
3103  * Indirect:    uap->path               Path to fifo (same as 'mkfifo')
3104  *              uap->uid                UID to set
3105  *              uap->gid                GID to set
3106  *              uap->mode               File mode to set (same as 'mkfifo')
3107  *              uap->xsecurity          ACL to set, if creating
3108  *
3109  * Returns:     0                       Success
3110  *              !0                      errno value
3111  *
3112  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
3113  *
3114  * XXX:         We should enummerate the possible errno values here, and where
3115  *              in the code they originated.
3116  */
3117 int
3118 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
3119 {
3120         int ciferror;
3121         kauth_filesec_t xsecdst;
3122         struct vnode_attr va;
3123
3124         AUDIT_ARG(owner, uap->uid, uap->gid);
3125
3126         xsecdst = KAUTH_FILESEC_NONE;
3127         if (uap->xsecurity != USER_ADDR_NULL) {
3128                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
3129                         return ciferror;
3130         }
3131
3132         VATTR_INIT(&va);
3133         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3134         if (uap->uid != KAUTH_UID_NONE)
3135                 VATTR_SET(&va, va_uid, uap->uid);
3136         if (uap->gid != KAUTH_GID_NONE)
3137                 VATTR_SET(&va, va_gid, uap->gid);
3138         if (xsecdst != KAUTH_FILESEC_NONE)
3139                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3140
3141         ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
3142
3143         if (xsecdst != KAUTH_FILESEC_NONE)
3144                 kauth_filesec_free(xsecdst);
3145         return ciferror;
3146 }
3147
3148 /* ARGSUSED */
3149 int
3150 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
3151 {
3152         struct vnode_attr va;
3153
3154         VATTR_INIT(&va);
3155         VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3156
3157         return(mkfifo1(vfs_context_current(), uap->path, &va));
3158 }
3159
3160
3161 static char *
3162 my_strrchr(char *p, int ch)
3163 {
3164         char *save;
3165
3166         for (save = NULL;; ++p) {
3167                 if (*p == ch)
3168                         save = p;
3169                 if (!*p)
3170                         return(save);
3171         }
3172         /* NOTREACHED */
3173 }
3174
3175 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
3176
3177 int
3178 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
3179 {
3180         int ret, len = _len;
3181
3182         *truncated_path = 0;
3183         ret = vn_getpath(dvp, path, &len);
3184         if (ret == 0 && len < (MAXPATHLEN - 1)) {
3185                 if (leafname) {
3186                         path[len-1] = '/';
3187                         len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
3188                         if (len > MAXPATHLEN) {
3189                                 char *ptr;
3190
3191                                 // the string got truncated!
3192                                 *truncated_path = 1;
3193                                 ptr = my_strrchr(path, '/');
3194                                 if (ptr) {
3195                                         *ptr = '\0';   // chop off the string at the last directory component
3196                                 }
3197                                 len = strlen(path) + 1;
3198                         }
3199                 }
3200         } else if (ret == 0) {
3201                 *truncated_path = 1;
3202         } else if (ret != 0) {
3203                 struct vnode *mydvp=dvp;
3204
3205                 if (ret != ENOSPC) {
3206                         printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
3207                                dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
3208                 }
3209                 *truncated_path = 1;
3210
3211                 do {
3212                         if (mydvp->v_parent != NULL) {
3213                                 mydvp = mydvp->v_parent;
3214                         } else if (mydvp->v_mount) {
3215                                 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
3216                                 break;
3217                         } else {
3218                                 // no parent and no mount point?  only thing is to punt and say "/" changed
3219                                 strlcpy(path, "/", _len);
3220                                 len = 2;
3221                                 mydvp = NULL;
3222                         }
3223
3224                         if (mydvp == NULL) {
3225                                 break;
3226                         }
3227
3228                         len = _len;
3229                         ret = vn_getpath(mydvp, path, &len);
3230                 } while (ret == ENOSPC);
3231         }
3232
3233         return len;
3234 }
3235
3236
3237 /*
3238  * Make a hard file link.
3239  *
3240  * Returns:     0                       Success
3241  *              EPERM
3242  *              EEXIST
3243  *              EXDEV
3244  *      namei:???
3245  *      vnode_authorize:???
3246  *      VNOP_LINK:???
3247  */
3248 /* ARGSUSED */
3249 int
3250 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
3251 {
3252         vnode_t vp, dvp, lvp;
3253         struct nameidata nd;
3254         vfs_context_t ctx = vfs_context_current();
3255         int error;
3256 #if CONFIG_FSE
3257         fse_info finfo;
3258 #endif
3259         int need_event, has_listeners;
3260         char *target_path = NULL;
3261         int truncated=0;
3262
3263         vp = dvp = lvp = NULLVP;
3264
3265         /* look up the object we are linking to */
3266         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
3267                 UIO_USERSPACE, uap->path, ctx);
3268         error = namei(&nd);
3269         if (error)
3270                 return (error);
3271         vp = nd.ni_vp;
3272
3273         nameidone(&nd);
3274
3275         /*
3276          * Normally, linking to directories is not supported.
3277          * However, some file systems may have limited support.
3278          */
3279         if (vp->v_type == VDIR) {
3280                 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
3281                         error = EPERM;   /* POSIX */
3282                         goto out;
3283                 }
3284                 /* Linking to a directory requires ownership. */
3285                 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
3286                         struct vnode_attr dva;
3287
3288                         VATTR_INIT(&dva);
3289                         VATTR_WANTED(&dva, va_uid);
3290                         if (vnode_getattr(vp, &dva, ctx) != 0 ||
3291                             !VATTR_IS_SUPPORTED(&dva, va_uid) ||
3292                             (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
3293                                 error = EACCES;
3294                                 goto out;
3295                         }
3296                 }
3297         }
3298
3299         /* lookup the target node */
3300         nd.ni_cnd.cn_nameiop = CREATE;
3301         nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
3302         nd.ni_dirp = uap->link;
3303         error = namei(&nd);
3304         if (error != 0)
3305                 goto out;
3306         dvp = nd.ni_dvp;
3307         lvp = nd.ni_vp;
3308
3309 #if CONFIG_MACF
3310         if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
3311                 goto out2;
3312 #endif
3313
3314         /* or to anything that kauth doesn't want us to (eg. immutable items) */
3315         if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
3316                 goto out2;
3317
3318         /* target node must not exist */
3319         if (lvp != NULLVP) {
3320                 error = EEXIST;
3321                 goto out2;
3322         }
3323         /* cannot link across mountpoints */
3324         if (vnode_mount(vp) != vnode_mount(dvp)) {
3325                 error = EXDEV;
3326                 goto out2;
3327         }
3328
3329         /* authorize creation of the target note */
3330         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3331                 goto out2;
3332
3333         /* and finally make the link */
3334         error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
3335         if (error)
3336                 goto out2;
3337
3338 #if CONFIG_FSE
3339         need_event = need_fsevent(FSE_CREATE_FILE, dvp);
3340 #else
3341         need_event = 0;
3342 #endif
3343         has_listeners = kauth_authorize_fileop_has_listeners();
3344
3345         if (need_event || has_listeners) {
3346                 char *link_to_path = NULL;
3347                 int len, link_name_len;
3348
3349                 /* build the path to the new link file */
3350                 GET_PATH(target_path);
3351                 if (target_path == NULL) {
3352                         error = ENOMEM;
3353                         goto out2;
3354                 }
3355
3356                 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
3357
3358                 if (has_listeners) {
3359                         /* build the path to file we are linking to */
3360                         GET_PATH(link_to_path);
3361                         if (link_to_path == NULL) {
3362                                 error = ENOMEM;
3363                                 goto out2;
3364                         }
3365
3366                         link_name_len = MAXPATHLEN;
3367                         vn_getpath(vp, link_to_path, &link_name_len);
3368
3369                         /*
3370                          * Call out to allow 3rd party notification of rename.
3371                          * Ignore result of kauth_authorize_fileop call.
3372                          */
3373                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
3374                                                (uintptr_t)link_to_path, (uintptr_t)target_path);
3375                         if (link_to_path != NULL) {
3376                                 RELEASE_PATH(link_to_path);
3377                         }
3378                 }
3379 #if CONFIG_FSE
3380                 if (need_event) {
3381                         /* construct fsevent */
3382                         if (get_fse_info(vp, &finfo, ctx) == 0) {
3383                                 if (truncated) {
3384                                         finfo.mode |= FSE_TRUNCATED_PATH;
3385                                 }
3386
3387                                 // build the path to the destination of the link
3388                                 add_fsevent(FSE_CREATE_FILE, ctx,
3389                                             FSE_ARG_STRING, len, target_path,
3390                                             FSE_ARG_FINFO, &finfo,
3391                                             FSE_ARG_DONE);
3392                         }
3393                         if (vp->v_parent) {
3394                             add_fsevent(FSE_STAT_CHANGED, ctx,
3395                                 FSE_ARG_VNODE, vp->v_parent,
3396                                 FSE_ARG_DONE);
3397                         }
3398                 }
3399 #endif
3400         }
3401 out2:
3402         /*
3403          * nameidone has to happen before we vnode_put(dvp)
3404          * since it may need to release the fs_nodelock on the dvp
3405          */
3406         nameidone(&nd);
3407         if (target_path != NULL) {
3408                 RELEASE_PATH(target_path);
3409         }
3410 out:
3411         if (lvp)
3412                 vnode_put(lvp);
3413         if (dvp)
3414                 vnode_put(dvp);
3415         vnode_put(vp);
3416         return (error);
3417 }
3418
3419 /*
3420  * Make a symbolic link.
3421  *
3422  * We could add support for ACLs here too...
3423  */
3424 /* ARGSUSED */
3425 int
3426 symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval)
3427 {
3428         struct vnode_attr va;
3429         char *path;
3430         int error;
3431         struct nameidata nd;
3432         vfs_context_t ctx = vfs_context_current();
3433         vnode_t vp, dvp;
3434         size_t dummy=0;
3435
3436         MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
3437         error = copyinstr(uap->path, path, MAXPATHLEN, &dummy);
3438         if (error)
3439                 goto out;
3440         AUDIT_ARG(text, path);  /* This is the link string */
3441
3442         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
3443                 UIO_USERSPACE, uap->link, ctx);
3444         error = namei(&nd);
3445         if (error)
3446                 goto out;
3447         dvp = nd.ni_dvp;
3448         vp = nd.ni_vp;
3449
3450         VATTR_INIT(&va);
3451         VATTR_SET(&va, va_type, VLNK);
3452         VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
3453 #if CONFIG_MACF
3454         error = mac_vnode_check_create(ctx,
3455                         dvp, &nd.ni_cnd, &va);
3456 #endif
3457         if (error != 0) {
3458             goto skipit;
3459         }
3460
3461         if (vp != NULL) {
3462             error = EEXIST;
3463             goto skipit;
3464         }
3465
3466         /* authorize */
3467         if (error == 0)
3468                 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
3469         /* get default ownership, etc. */
3470         if (error == 0)
3471                 error = vnode_authattr_new(dvp, &va, 0, ctx);
3472         if (error == 0)
3473                 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
3474
3475         /* do fallback attribute handling */
3476         if (error == 0)
3477                 error = vnode_setattr_fallback(vp, &va, ctx);
3478
3479         if (error == 0) {
3480                 int     update_flags = 0;
3481
3482                 if (vp == NULL) {
3483                         nd.ni_cnd.cn_nameiop = LOOKUP;
3484                         nd.ni_cnd.cn_flags = 0;
3485                         error = namei(&nd);
3486                         vp = nd.ni_vp;
3487
3488                         if (vp == NULL)
3489                                 goto skipit;
3490                 }
3491
3492 #if 0  /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
3493                 /* call out to allow 3rd party notification of rename.
3494                  * Ignore result of kauth_authorize_fileop call.
3495                  */
3496                 if (kauth_authorize_fileop_has_listeners() &&
3497                     namei(&nd) == 0) {
3498                         char *new_link_path = NULL;
3499                         int             len;
3500
3501                         /* build the path to the new link file */
3502                         new_link_path = get_pathbuff();
3503                         len = MAXPATHLEN;
3504                         vn_getpath(dvp, new_link_path, &len);
3505                         if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
3506                                 new_link_path[len - 1] = '/';
3507                                 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
3508                         }
3509
3510                         kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
3511                                            (uintptr_t)path, (uintptr_t)new_link_path);
3512                         if (new_link_path != NULL)
3513                                 release_pathbuff(new_link_path);
3514                 }
3515 #endif
3516                 // Make sure the name & parent pointers are hooked up
3517                 if (vp->v_name == NULL)
3518                         update_flags |= VNODE_UPDATE_NAME;
3519                 if (vp->v_parent == NULLVP)
3520                         update_flags |= VNODE_UPDATE_PARENT;
3521
3522                 if (update_flags)
3523                         vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3524
3525 #if CONFIG_FSE
3526                 add_fsevent(FSE_CREATE_FILE, ctx,
3527                             FSE_ARG_VNODE, vp,
3528                             FSE_ARG_DONE);
3529 #endif
3530         }
3531
3532 skipit:
3533         /*
3534          * nameidone has to happen before we vnode_put(dvp)
3535          * since it may need to release the fs_nodelock on the dvp
3536          */
3537         nameidone(&nd);
3538
3539         if (vp)
3540                 vnode_put(vp);
3541         vnode_put(dvp);
3542 out:
3543         FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
3544
3545         return (error);
3546 }
3547
3548 /*
3549  * Delete a whiteout from the filesystem.
3550  * XXX authorization not implmented for whiteouts
3551  */
3552 int
3553 undelete(__unused proc_t p, struct undelete_args *uap, __unused int32_t *retval)
3554 {
3555         int error;
3556         struct nameidata nd;
3557         vfs_context_t ctx = vfs_context_current();
3558         vnode_t vp, dvp;
3559
3560         NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT|AUDITVNPATH1,
3561                 UIO_USERSPACE, uap->path, ctx);
3562         error = namei(&nd);
3563         if (error)
3564                 return (error);
3565         dvp = nd.ni_dvp;
3566         vp = nd.ni_vp;
3567
3568         if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) {
3569                 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, ctx);
3570         } else
3571                 error = EEXIST;
3572
3573         /*
3574          * nameidone has to happen before we vnode_put(dvp)
3575          * since it may need to release the fs_nodelock on the dvp
3576          */
3577         nameidone(&nd);
3578
3579         if (vp)
3580                 vnode_put(vp);
3581         vnode_put(dvp);
3582
3583         return (error);
3584 }
3585
3586
3587 /*
3588  * Delete a name from the filesystem.
3589  */
3590 /* ARGSUSED */
3591 int
3592 unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy)
3593 {
3594         vnode_t vp, dvp;
3595         int error;
3596         struct componentname *cnp;
3597         char  *path = NULL;
3598         int  len=0;
3599 #if CONFIG_FSE
3600         fse_info  finfo;
3601 #endif
3602         int flags = 0;
3603         int need_event = 0;
3604         int has_listeners = 0;
3605         int truncated_path=0;
3606 #if NAMEDRSRCFORK
3607         /* unlink or delete is allowed on rsrc forks and named streams */
3608         ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
3609 #endif
3610
3611         ndp->ni_cnd.cn_flags |= LOCKPARENT;
3612         cnp = &ndp->ni_cnd;
3613
3614         error = namei(ndp);
3615         if (error)
3616                 return (error);
3617
3618         dvp = ndp->ni_dvp;
3619         vp = ndp->ni_vp;
3620
3621         /* With Carbon delete semantics, busy files cannot be deleted */
3622         if (nodelbusy) {
3623                 flags |= VNODE_REMOVE_NODELETEBUSY;
3624         }
3625
3626         /*
3627          * Normally, unlinking of directories is not supported.
3628          * However, some file systems may have limited support.
3629          */
3630         if ((vp->v_type == VDIR) &&
3631             !(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
3632                 error = EPERM;  /* POSIX */
3633         }
3634
3635         /*
3636          * The root of a mounted filesystem cannot be deleted.
3637          */
3638         if (vp->v_flag & VROOT) {
3639                 error = EBUSY;
3640         }
3641         if (error)
3642                 goto out;
3643
3644
3645         /* authorize the delete operation */
3646 #if CONFIG_MACF
3647         if (!error)
3648                 error = mac_vnode_check_unlink(ctx,
3649                     dvp, vp, cnp);
3650 #endif /* MAC */
3651         if (!error)
3652                 error = vnode_authorize(vp, ndp->ni_dvp, KAUTH_VNODE_DELETE, ctx);
3653         if (error)
3654                 goto out;
3655
3656 #if CONFIG_FSE
3657         need_event = need_fsevent(FSE_DELETE, dvp);
3658         if (need_event) {
3659                 if ((vp->v_flag & VISHARDLINK) == 0) {
3660                         get_fse_info(vp, &finfo, ctx);
3661                 }
3662         }
3663 #endif
3664         has_listeners = kauth_authorize_fileop_has_listeners();
3665         if (need_event || has_listeners) {
3666                 GET_PATH(path);
3667                 if (path == NULL) {
3668                         error = ENOMEM;
3669                         goto out;
3670                 }
3671
3672                 len = safe_getpath(dvp, ndp->ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
3673         }
3674
3675 #if NAMEDRSRCFORK
3676         if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK)
3677                 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
3678         else
3679 #endif
3680                 error = VNOP_REMOVE(dvp, vp, &ndp->ni_cnd, flags, ctx);
3681
3682         /*
3683          * Call out to allow 3rd party notification of delete.
3684          * Ignore result of kauth_authorize_fileop call.
3685          */
3686         if (!error) {
3687                 if (has_listeners) {
3688                         kauth_authorize_fileop(vfs_context_ucred(ctx),
3689                                 KAUTH_FILEOP_DELETE,
3690                                 (uintptr_t)vp,
3691                                 (uintptr_t)path);
3692                 }
3693
3694                 if (vp->v_flag & VISHARDLINK) {
3695                     //
3696                     // if a hardlink gets deleted we want to blow away the
3697                     // v_parent link because the path that got us to this
3698                     // instance of the link is no longer valid.  this will
3699                     // force the next call to get the path to ask the file
3700                     // system instead of just following the v_parent link.
3701                     //
3702                     vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
3703                 }
3704
3705 #if CONFIG_FSE
3706                 if (need_event) {
3707                         if (vp->v_flag & VISHARDLINK) {
3708                                 get_fse_info(vp, &finfo, ctx);
3709                         }
3710                         if (truncated_path) {
3711                                 finfo.mode |= FSE_TRUNCATED_PATH;
3712                         }
3713                         add_fsevent(FSE_DELETE, ctx,
3714                                                 FSE_ARG_STRING, len, path,
3715                                                 FSE_ARG_FINFO, &finfo,
3716                                                 FSE_ARG_DONE);
3717                 }
3718 #endif
3719         }
3720         if (path != NULL)
3721                 RELEASE_PATH(path);
3722
3723         /*
3724          * nameidone has to happen before we vnode_put(dvp)
3725          * since it may need to release the fs_nodelock on the dvp
3726          */
3727 out:
3728 #if NAMEDRSRCFORK
3729         /* recycle the deleted rsrc fork vnode to force a reclaim, which
3730          * will cause its shadow file to go away if necessary.
3731          */
3732          if ((vnode_isnamedstream(ndp->ni_vp)) &&
3733                 (ndp->ni_vp->v_parent != NULLVP) &&
3734                 vnode_isshadow(ndp->ni_vp)) {
3735                         vnode_recycle(ndp->ni_vp);
3736          }
3737 #endif
3738         nameidone(ndp);
3739         vnode_put(dvp);
3740         vnode_put(vp);
3741         return (error);
3742 }
3743
3744 /*
3745  * Delete a name from the filesystem using POSIX semantics.
3746  */
3747 int
3748 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
3749 {
3750         struct nameidata nd;
3751         vfs_context_t ctx = vfs_context_current();
3752
3753         NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
3754         return unlink1(ctx, &nd, 0);
3755 }
3756
3757 /*
3758  * Delete a name from the filesystem using Carbon semantics.
3759  */
3760 int
3761 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
3762 {
3763         struct nameidata nd;
3764         vfs_context_t ctx = vfs_context_current();
3765
3766         NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
3767         return unlink1(ctx, &nd, 1);
3768 }
3769
3770 /*
3771  * Reposition read/write file offset.
3772  */
3773 int
3774 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
3775 {
3776         struct fileproc *fp;
3777         vnode_t vp;
3778         struct vfs_context *ctx;
3779         off_t offset = uap->offset, file_size;
3780         int error;
3781
3782         if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
3783                 if (error == ENOTSUP)
3784                         return (ESPIPE);
3785                 return (error);
3786         }
3787         if (vnode_isfifo(vp)) {
3788                 file_drop(uap->fd);
3789                 return(ESPIPE);
3790         }
3791
3792
3793         ctx = vfs_context_current();
3794 #if CONFIG_MACF
3795         if (uap->whence == L_INCR && uap->offset == 0)
3796                 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
3797                     fp->f_fglob);
3798         else
3799                 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
3800                     fp->f_fglob);
3801         if (error) {
3802                 file_drop(uap->fd);
3803                 return (error);
3804         }
3805 #endif
3806         if ( (error = vnode_getwithref(vp)) ) {
3807                 file_drop(uap->fd);
3808                 return(error);
3809         }
3810
3811         switch (uap->whence) {
3812         case L_INCR:
3813                 offset += fp->f_fglob->fg_offset;
3814                 break;
3815         case L_XTND:
3816                 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
3817                         break;
3818                 offset += file_size;
3819                 break;
3820         case L_SET:
3821                 break;
3822         default:
3823                 error = EINVAL;
3824         }
3825         if (error == 0) {
3826                 if (uap->offset > 0 && offset < 0) {
3827                         /* Incremented/relative move past max size */
3828                         error = EOVERFLOW;
3829                 } else {
3830                         /*
3831                          * Allow negative offsets on character devices, per
3832                          * POSIX 1003.1-2001.  Most likely for writing disk
3833                          * labels.
3834                          */
3835                         if (offset < 0 && vp->v_type != VCHR) {
3836                                 /* Decremented/relative move before start */
3837                                 error = EINVAL;
3838                         } else {
3839                                 /* Success */
3840                                 fp->f_fglob->fg_offset = offset;
3841                                 *retval = fp->f_fglob->fg_offset;
3842                         }
3843                 }
3844         }
3845
3846         /*
3847          * An lseek can affect whether data is "available to read."  Use
3848          * hint of NOTE_NONE so no EVFILT_VNODE events fire
3849          */
3850         post_event_if_success(vp, error, NOTE_NONE);
3851         (void)vnode_put(vp);
3852         file_drop(uap->fd);
3853         return (error);
3854 }
3855
3856
3857 /*
3858  * Check access permissions.
3859  *
3860  * Returns:     0                       Success
3861  *              vnode_authorize:???
3862  */
3863 static int
3864 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
3865 {
3866         kauth_action_t action;
3867         int error;
3868
3869         /*
3870          * If just the regular access bits, convert them to something
3871          * that vnode_authorize will understand.
3872          */
3873         if (!(uflags & _ACCESS_EXTENDED_MASK)) {
3874                 action = 0;
3875                 if (uflags & R_OK)
3876                         action |= KAUTH_VNODE_READ_DATA;        /* aka KAUTH_VNODE_LIST_DIRECTORY */
3877                 if (uflags & W_OK) {
3878                         if (vnode_isdir(vp)) {
3879                                 action |= KAUTH_VNODE_ADD_FILE |
3880                                     KAUTH_VNODE_ADD_SUBDIRECTORY;
3881                                 /* might want delete rights here too */
3882                         } else {
3883                                 action |= KAUTH_VNODE_WRITE_DATA;
3884                         }
3885                 }
3886                 if (uflags & X_OK) {
3887                         if (vnode_isdir(vp)) {
3888                                 action |= KAUTH_VNODE_SEARCH;
3889                         } else {
3890                                 action |= KAUTH_VNODE_EXECUTE;
3891                         }
3892                 }
3893         } else {
3894                 /* take advantage of definition of uflags */
3895                 action = uflags >> 8;
3896         }
3897
3898 #if CONFIG_MACF
3899         error = mac_vnode_check_access(ctx, vp, uflags);
3900         if (error)
3901                 return (error);
3902 #endif /* MAC */
3903
3904         /* action == 0 means only check for existence */
3905         if (action != 0) {
3906                 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
3907         } else {
3908                 error = 0;
3909         }
3910
3911         return(error);
3912 }
3913
3914
3915
3916 /*
3917  * access_extended: Check access permissions in bulk.
3918  *
3919  * Description: uap->entries            Pointer to an array of accessx
3920  *                                      descriptor structs, plus one or
3921  *                                      more NULL terminated strings (see
3922  *                                      "Notes" section below).
3923  *              uap->size               Size of the area pointed to by
3924  *                                      uap->entries.
3925  *              uap->results            Pointer to the results array.
3926  *
3927  * Returns:     0                       Success
3928  *              ENOMEM                  Insufficient memory
3929  *              EINVAL                  Invalid arguments
3930  *              namei:EFAULT            Bad address
3931  *              namei:ENAMETOOLONG      Filename too long
3932  *              namei:ENOENT            No such file or directory
3933  *              namei:ELOOP             Too many levels of symbolic links
3934  *              namei:EBADF             Bad file descriptor
3935  *              namei:ENOTDIR           Not a directory
3936  *              namei:???
3937  *              access1:
3938  *
3939  * Implicit returns:
3940  *              uap->results            Array contents modified
3941  *
3942  * Notes:       The uap->entries are structured as an arbitrary length array
3943  *              of accessx descriptors, followed by one or more NULL terminated
3944  *              strings
3945  *
3946  *                      struct accessx_descriptor[0]
3947  *                      ...
3948  *                      struct accessx_descriptor[n]
3949  *                      char name_data[0];
3950  *
3951  *              We determine the entry count by walking the buffer containing
3952  *              the uap->entries argument descriptor.  For each descriptor we
3953  *              see, the valid values for the offset ad_name_offset will be
3954  *              in the byte range:
3955  *
3956  *                      [ uap->entries + sizeof(struct accessx_descriptor) ]
3957  *                                              to
3958  *                              [ uap->entries + uap->size - 2 ]
3959  *
3960  *              since we must have at least one string, and the string must
3961  *              be at least one character plus the NULL terminator in length.
3962  *
3963  * XXX:         Need to support the check-as uid argument
3964  */
3965 int
3966 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
3967 {
3968         struct accessx_descriptor *input = NULL;
3969         errno_t *result = NULL;
3970         errno_t error = 0;
3971         int wantdelete = 0;
3972         unsigned int desc_max, desc_actual, i, j;
3973         struct vfs_context context;
3974         struct nameidata nd;
3975         int niopts;
3976         vnode_t vp = NULL;
3977         vnode_t dvp = NULL;
3978 #define ACCESSX_MAX_DESCR_ON_STACK 10
3979         struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
3980
3981         context.vc_ucred = NULL;
3982
3983         /*
3984          * Validate parameters; if valid, copy the descriptor array and string
3985          * arguments into local memory.  Before proceeding, the following
3986          * conditions must have been met:
3987          *
3988          * o    The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
3989          * o    There must be sufficient room in the request for at least one
3990          *      descriptor and a one yte NUL terminated string.
3991          * o    The allocation of local storage must not fail.
3992          */
3993         if (uap->size > ACCESSX_MAX_TABLESIZE)
3994                 return(ENOMEM);
3995         if (uap->size < (sizeof(struct accessx_descriptor) + 2))
3996                 return(EINVAL);
3997         if (uap->size <= sizeof (stack_input)) {
3998                 input = stack_input;
3999         } else {
4000         MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
4001         if (input == NULL) {
4002                 error = ENOMEM;
4003                 goto out;
4004         }
4005         }
4006         error = copyin(uap->entries, input, uap->size);
4007         if (error)
4008                 goto out;
4009
4010         AUDIT_ARG(opaque, input, uap->size);
4011
4012         /*
4013          * Force NUL termination of the copyin buffer to avoid nami() running
4014          * off the end.  If the caller passes us bogus data, they may get a
4015          * bogus result.
4016          */
4017         ((char *)input)[uap->size - 1] = 0;
4018
4019         /*
4020          * Access is defined as checking against the process' real identity,
4021          * even if operations are checking the effective identity.  This
4022          * requires that we use a local vfs context.
4023          */
4024         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
4025         context.vc_thread = current_thread();
4026
4027         /*
4028          * Find out how many entries we have, so we can allocate the result
4029          * array by walking the list and adjusting the count downward by the
4030          * earliest string offset we see.
4031          */
4032         desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
4033         desc_actual = desc_max;
4034         for (i = 0; i < desc_actual; i++) {
4035                 /*
4036                  * Take the offset to the name string for this entry and
4037                  * convert to an input array index, which would be one off
4038                  * the end of the array if this entry was the lowest-addressed
4039                  * name string.
4040                  */
4041                 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
4042
4043                 /*
4044                  * An offset greater than the max allowable offset is an error.
4045                  * It is also an error for any valid entry to point
4046                  * to a location prior to the end of the current entry, if
4047                  * it's not a reference to the string of the previous entry.
4048                  */
4049                 if (j > desc_max || (j != 0 && j <= i)) {
4050                         error = EINVAL;
4051                         goto out;
4052                 }
4053
4054                 /*
4055                  * An offset of 0 means use the previous descriptor's offset;
4056                  * this is used to chain multiple requests for the same file
4057                  * to avoid multiple lookups.
4058                  */
4059                 if (j == 0) {
4060                         /* This is not valid for the first entry */
4061                         if (i == 0) {
4062                                 error = EINVAL;
4063                                 goto out;
4064                         }
4065                         continue;
4066                 }
4067
4068                 /*
4069                  * If the offset of the string for this descriptor is before
4070                  * what we believe is the current actual last descriptor,
4071                  * then we need to adjust our estimate downward; this permits
4072                  * the string table following the last descriptor to be out
4073                  * of order relative to the descriptor list.
4074                  */
4075                 if (j < desc_actual)
4076                         desc_actual = j;
4077         }
4078
4079         /*
4080          * We limit the actual number of descriptors we are willing to process
4081          * to a hard maximum of ACCESSX_MAX_DESCRIPTORS.  If the number being
4082          * requested does not exceed this limit,
4083          */
4084         if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
4085                 error = ENOMEM;
4086                 goto out;
4087         }
4088         MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
4089         if (result == NULL) {
4090                 error = ENOMEM;
4091                 goto out;
4092         }
4093
4094         /*
4095          * Do the work by iterating over the descriptor entries we know to
4096          * at least appear to contain valid data.
4097          */
4098         error = 0;
4099         for (i = 0; i < desc_actual; i++) {
4100                 /*
4101                  * If the ad_name_offset is 0, then we use the previous
4102                  * results to make the check; otherwise, we are looking up
4103                  * a new file name.
4104                  */
4105                 if (input[i].ad_name_offset != 0) {
4106                         /* discard old vnodes */
4107                         if (vp) {
4108                                 vnode_put(vp);
4109                                 vp = NULL;
4110                         }
4111                         if (dvp) {
4112                                 vnode_put(dvp);
4113                                 dvp = NULL;
4114                         }
4115
4116                         /*
4117                          * Scan forward in the descriptor list to see if we
4118                          * need the parent vnode.  We will need it if we are
4119                          * deleting, since we must have rights  to remove
4120                          * entries in the parent directory, as well as the
4121                          * rights to delete the object itself.
4122                          */
4123                         wantdelete = input[i].ad_flags & _DELETE_OK;
4124                         for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
4125                                 if (input[j].ad_flags & _DELETE_OK)
4126                                         wantdelete = 1;
4127
4128                         niopts = FOLLOW | AUDITVNPATH1;
4129
4130                         /* need parent for vnode_authorize for deletion test */
4131                         if (wantdelete)
4132                                 niopts |= WANTPARENT;
4133
4134                         /* do the lookup */
4135                         NDINIT(&nd, LOOKUP, niopts, UIO_SYSSPACE, CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset), &context);
4136                         error = namei(&nd);
4137                         if (!error) {
4138                                 vp = nd.ni_vp;
4139                                 if (wantdelete)
4140                                         dvp = nd.ni_dvp;
4141                         }
4142                         nameidone(&nd);
4143                 }
4144
4145                 /*
4146                  * Handle lookup errors.
4147                  */
4148                 switch(error) {
4149                 case ENOENT:
4150                 case EACCES:
4151                 case EPERM:
4152                 case ENOTDIR:
4153                         result[i] = error;
4154                         break;
4155                 case 0:
4156                         /* run this access check */
4157                         result[i] = access1(vp, dvp, input[i].ad_flags, &context);
4158                         break;
4159                 default:
4160                         /* fatal lookup error */
4161
4162                         goto out;
4163                 }
4164         }
4165
4166         AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
4167
4168         /* copy out results */
4169         error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
4170
4171 out:
4172         if (input && input != stack_input)
4173                 FREE(input, M_TEMP);
4174         if (result)
4175                 FREE(result, M_TEMP);
4176         if (vp)
4177                 vnode_put(vp);
4178         if (dvp)
4179                 vnode_put(dvp);
4180         if (IS_VALID_CRED(context.vc_ucred))
4181                 kauth_cred_unref(&context.vc_ucred);
4182         return(error);
4183 }
4184
4185
4186 /*
4187  * Returns:     0                       Success
4188  *              namei:EFAULT            Bad address
4189  *              namei:ENAMETOOLONG      Filename too long
4190  *              namei:ENOENT            No such file or directory
4191  *              namei:ELOOP             Too many levels of symbolic links
4192  *              namei:EBADF             Bad file descriptor
4193  *              namei:ENOTDIR           Not a directory
4194  *              namei:???
4195  *              access1:
4196  */
4197 int
4198 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
4199 {
4200         int error;
4201         struct nameidata nd;
4202         int niopts;
4203         struct vfs_context context;
4204 #if NAMEDRSRCFORK
4205         int is_namedstream = 0;
4206 #endif
4207
4208         /*
4209          * Access is defined as checking against the process'
4210          * real identity, even if operations are checking the
4211          * effective identity.  So we need to tweak the credential
4212          * in the context.
4213          */
4214         context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
4215         context.vc_thread = current_thread();
4216
4217         niopts = FOLLOW | AUDITVNPATH1;
4218         /* need parent for vnode_authorize for deletion test */
4219         if (uap->flags & _DELETE_OK)
4220                 niopts |= WANTPARENT;
4221         NDINIT(&nd, LOOKUP, niopts, UIO_USERSPACE, uap->path, &context);
4222
4223 #if NAMEDRSRCFORK
4224         /* access(F_OK) calls are allowed for resource forks. */
4225         if (uap->flags == F_OK)
4226                 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4227 #endif
4228         error = namei(&nd);
4229         if (error)
4230                 goto out;
4231
4232 #if NAMEDRSRCFORK
4233         /* Grab reference on the shadow stream file vnode to
4234          * force an inactive on release which will mark it
4235          * for recycle.
4236          */
4237         if (vnode_isnamedstream(nd.ni_vp) &&
4238             (nd.ni_vp->v_parent != NULLVP) &&
4239             vnode_isshadow(nd.ni_vp)) {
4240                 is_namedstream = 1;
4241                 vnode_ref(nd.ni_vp);
4242         }
4243 #endif
4244
4245         error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context);
4246
4247 #if NAMEDRSRCFORK
4248         if (is_namedstream) {
4249                 vnode_rele(nd.ni_vp);
4250         }
4251 #endif
4252
4253         vnode_put(nd.ni_vp);
4254         if (uap->flags & _DELETE_OK)
4255                 vnode_put(nd.ni_dvp);
4256         nameidone(&nd);
4257
4258 out:
4259         kauth_cred_unref(&context.vc_ucred);
4260         return(error);
4261 }
4262
4263
4264 /*
4265  * Returns:     0                       Success
4266  *              EFAULT
4267  *      copyout:EFAULT
4268  *      namei:???
4269  *      vn_stat:???
4270  */
4271 static int
4272 stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4273 {
4274         union {
4275                 struct stat sb;
4276                 struct stat64 sb64;
4277         } source;
4278         union {
4279                 struct user64_stat user64_sb;
4280                 struct user32_stat user32_sb;
4281                 struct user64_stat64 user64_sb64;
4282                 struct user32_stat64 user32_sb64;
4283         } dest;
4284         caddr_t sbp;
4285         int error, my_size;
4286         kauth_filesec_t fsec;
4287         size_t xsecurity_bufsize;
4288         void * statptr;
4289
4290 #if NAMEDRSRCFORK
4291         int is_namedstream = 0;
4292         /* stat calls are allowed for resource forks. */
4293         ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4294 #endif
4295         error = namei(ndp);
4296         if (error)
4297                 return (error);
4298         fsec = KAUTH_FILESEC_NONE;
4299
4300         statptr = (void *)&source;
4301
4302 #if NAMEDRSRCFORK
4303         /* Grab reference on the shadow stream file vnode to
4304          * force an inactive on release which will mark it
4305          * for recycle.
4306          */
4307         if (vnode_isnamedstream(ndp->ni_vp) &&
4308             (ndp->ni_vp->v_parent != NULLVP) &&
4309             vnode_isshadow(ndp->ni_vp)) {
4310                 is_namedstream = 1;
4311                 vnode_ref(ndp->ni_vp);
4312         }
4313 #endif
4314
4315         error = vn_stat(ndp->ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
4316
4317 #if NAMEDRSRCFORK
4318         if (is_namedstream) {
4319                 vnode_rele(ndp->ni_vp);
4320         }
4321 #endif
4322         vnode_put(ndp->ni_vp);
4323         nameidone(ndp);
4324
4325         if (error)
4326                 return (error);
4327         /* Zap spare fields */
4328         if (isstat64 != 0) {
4329                 source.sb64.st_lspare = 0;
4330                 source.sb64.st_qspare[0] = 0LL;
4331                 source.sb64.st_qspare[1] = 0LL;
4332                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
4333                         munge_user64_stat64(&source.sb64, &dest.user64_sb64);
4334                         my_size = sizeof(dest.user64_sb64);
4335                         sbp = (caddr_t)&dest.user64_sb64;
4336                 } else {
4337                         munge_user32_stat64(&source.sb64, &dest.user32_sb64);
4338                         my_size = sizeof(dest.user32_sb64);
4339                         sbp = (caddr_t)&dest.user32_sb64;
4340                 }
4341                 /*
4342                  * Check if we raced (post lookup) against the last unlink of a file.
4343                  */
4344                 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
4345                         source.sb64.st_nlink = 1;
4346                 }
4347         } else {
4348                 source.sb.st_lspare = 0;
4349                 source.sb.st_qspare[0] = 0LL;
4350                 source.sb.st_qspare[1] = 0LL;
4351                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
4352                         munge_user64_stat(&source.sb, &dest.user64_sb);
4353                         my_size = sizeof(dest.user64_sb);
4354                         sbp = (caddr_t)&dest.user64_sb;
4355                 } else {
4356                         munge_user32_stat(&source.sb, &dest.user32_sb);
4357                         my_size = sizeof(dest.user32_sb);
4358                         sbp = (caddr_t)&dest.user32_sb;
4359                 }
4360
4361                 /*
4362                  * Check if we raced (post lookup) against the last unlink of a file.
4363                  */
4364                 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
4365                         source.sb.st_nlink = 1;
4366                 }
4367         }
4368         if ((error = copyout(sbp, ub, my_size)) != 0)
4369                 goto out;
4370
4371         /* caller wants extended security information? */
4372         if (xsecurity != USER_ADDR_NULL) {
4373
4374                 /* did we get any? */
4375                 if (fsec == KAUTH_FILESEC_NONE) {
4376                         if (susize(xsecurity_size, 0) != 0) {
4377                                 error = EFAULT;
4378                                 goto out;
4379                         }
4380                 } else {
4381                         /* find the user buffer size */
4382                         xsecurity_bufsize = fusize(xsecurity_size);
4383
4384                         /* copy out the actual data size */
4385                         if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
4386                                 error = EFAULT;
4387                                 goto out;
4388                         }
4389
4390                         /* if the caller supplied enough room, copy out to it */
4391                         if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
4392                                 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
4393                 }
4394         }
4395 out:
4396         if (fsec != KAUTH_FILESEC_NONE)
4397                 kauth_filesec_free(fsec);
4398         return (error);
4399 }
4400
4401 /*
4402  * Get file status; this version follows links.
4403  *
4404  * Returns:     0                       Success
4405  *      stat2:???                       [see stat2() in this file]
4406  */
4407 static int
4408 stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4409 {
4410         struct nameidata nd;
4411         vfs_context_t ctx = vfs_context_current();
4412
4413         NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
4414             UIO_USERSPACE, path, ctx);
4415         return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
4416 }
4417
4418 /*
4419  * stat_extended: Get file status; with extended security (ACL).
4420  *
4421  * Parameters:    p                       (ignored)
4422  *                uap                     User argument descriptor (see below)
4423  *                retval                  (ignored)
4424  *
4425  * Indirect:      uap->path               Path of file to get status from
4426  *                uap->ub                 User buffer (holds file status info)
4427  *                uap->xsecurity          ACL to get (extended security)
4428  *                uap->xsecurity_size     Size of ACL
4429  *
4430  * Returns:        0                      Success
4431  *                !0                      errno value
4432  *
4433  */
4434 int
4435 stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused int32_t *retval)
4436 {
4437         return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
4438 }
4439
4440 /*
4441  * Returns:     0                       Success
4442  *      stat1:???                       [see stat1() in this file]
4443  */
4444 int
4445 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
4446 {
4447         return(stat1(uap->path, uap->ub, 0, 0, 0));
4448 }
4449
4450 int
4451 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
4452 {
4453         return(stat1(uap->path, uap->ub, 0, 0, 1));
4454 }
4455
4456 /*
4457  * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
4458  *
4459  * Parameters:    p                       (ignored)
4460  *                uap                     User argument descriptor (see below)
4461  *                retval                  (ignored)
4462  *
4463  * Indirect:      uap->path               Path of file to get status from
4464  *                uap->ub                 User buffer (holds file status info)
4465  *                uap->xsecurity          ACL to get (extended security)
4466  *                uap->xsecurity_size     Size of ACL
4467  *
4468  * Returns:        0                      Success
4469  *                !0                      errno value
4470  *
4471  */
4472 int
4473 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
4474 {
4475         return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
4476 }
4477 /*
4478  * Get file status; this version does not follow links.
4479  */
4480 static int
4481 lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4482 {
4483         struct nameidata nd;
4484         vfs_context_t ctx = vfs_context_current();
4485
4486         NDINIT(&nd, LOOKUP, NOTRIGGER | NOFOLLOW | AUDITVNPATH1,
4487             UIO_USERSPACE, path, ctx);
4488
4489         return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
4490 }
4491
4492 /*
4493  * lstat_extended: Get file status; does not follow links; with extended security (ACL).
4494  *
4495  * Parameters:    p                       (ignored)
4496  *                uap                     User argument descriptor (see below)
4497  *                retval                  (ignored)
4498  *
4499  * Indirect:      uap->path               Path of file to get status from
4500  *                uap->ub                 User buffer (holds file status info)
4501  *                uap->xsecurity          ACL to get (extended security)
4502  *                uap->xsecurity_size     Size of ACL
4503  *
4504  * Returns:        0                      Success
4505  *                !0                      errno value
4506  *
4507  */
4508 int
4509 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
4510 {
4511         return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
4512 }
4513
4514 int
4515 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
4516 {
4517         return(lstat1(uap->path, uap->ub, 0, 0, 0));
4518 }
4519
4520 int
4521 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
4522 {
4523         return(lstat1(uap->path, uap->ub, 0, 0, 1));
4524 }
4525
4526 /*
4527  * lstat64_extended: Get file status; can handle large inode numbers; does not
4528  * follow links; with extended security (ACL).
4529  *
4530  * Parameters:    p                       (ignored)
4531  *                uap                     User argument descriptor (see below)
4532  *                retval                  (ignored)
4533  *
4534  * Indirect:      uap->path               Path of file to get status from
4535  *                uap->ub                 User buffer (holds file status info)
4536  *                uap->xsecurity          ACL to get (extended security)
4537  *                uap->xsecurity_size     Size of ACL
4538  *
4539  * Returns:        0                      Success
4540  *                !0                      errno value
4541  *
4542  */
4543 int
4544 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
4545 {
4546         return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
4547 }
4548
4549 /*
4550  * Get configurable pathname variables.
4551  *
4552  * Returns:     0                       Success
4553  *      namei:???
4554  *      vn_pathconf:???
4555  *
4556  * Notes:       Global implementation  constants are intended to be
4557  *              implemented in this function directly; all other constants
4558  *              are per-FS implementation, and therefore must be handled in
4559  *              each respective FS, instead.
4560  *
4561  * XXX We implement some things globally right now that should actually be
4562  * XXX per-FS; we will need to deal with this at some point.
4563  */
4564 /* ARGSUSED */
4565 int
4566 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
4567 {
4568         int error;
4569         struct nameidata nd;
4570         vfs_context_t ctx = vfs_context_current();
4571
4572         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4573                 UIO_USERSPACE, uap->path, ctx);
4574         error = namei(&nd);
4575         if (error)
4576                 return (error);
4577
4578         error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
4579
4580         vnode_put(nd.ni_vp);
4581         nameidone(&nd);
4582         return (error);
4583 }
4584
4585 /*
4586  * Return target name of a symbolic link.
4587  */
4588 /* ARGSUSED */
4589 int
4590 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
4591 {
4592         vnode_t vp;
4593         uio_t auio;
4594         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
4595         int error;
4596         struct nameidata nd;
4597         vfs_context_t ctx = vfs_context_current();
4598         char uio_buf[ UIO_SIZEOF(1) ];
4599
4600         NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNPATH1,
4601                 UIO_USERSPACE, uap->path, ctx);
4602         error = namei(&nd);
4603         if (error)
4604                 return (error);
4605         vp = nd.ni_vp;
4606
4607         nameidone(&nd);
4608
4609         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
4610                                                                   &uio_buf[0], sizeof(uio_buf));
4611         uio_addiov(auio, uap->buf, uap->count);
4612         if (vp->v_type != VLNK)
4613                 error = EINVAL;
4614         else {
4615 #if CONFIG_MACF
4616                 error = mac_vnode_check_readlink(ctx,
4617                     vp);
4618 #endif
4619                 if (error == 0)
4620                         error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx);
4621                 if (error == 0)
4622                         error = VNOP_READLINK(vp, auio, ctx);
4623         }
4624         vnode_put(vp);
4625
4626         /* Safe: uio_resid() is bounded above by "count", and "count" is an int  */
4627         *retval = uap->count - (int)uio_resid(auio);
4628         return (error);
4629 }
4630
4631 /*
4632  * Change file flags.
4633  */
4634 static int
4635 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
4636 {
4637         struct vnode_attr va;
4638         kauth_action_t action;
4639         int error;
4640
4641         VATTR_INIT(&va);
4642         VATTR_SET(&va, va_flags, flags);
4643
4644 #if CONFIG_MACF
4645         error = mac_vnode_check_setflags(ctx, vp, flags);
4646         if (error)
4647                 goto out;
4648 #endif
4649
4650         /* request authorisation, disregard immutability */
4651         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4652                 goto out;
4653         /*
4654          * Request that the auth layer disregard those file flags it's allowed to when
4655          * authorizing this operation; we need to do this in order to be able to
4656          * clear immutable flags.
4657          */
4658         if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
4659                 goto out;
4660         error = vnode_setattr(vp, &va, ctx);
4661
4662         if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
4663                 error = ENOTSUP;
4664         }
4665 out:
4666         vnode_put(vp);
4667         return(error);
4668 }
4669
4670 /*
4671  * Change flags of a file given a path name.
4672  */
4673 /* ARGSUSED */
4674 int
4675 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
4676 {
4677         vnode_t vp;
4678         vfs_context_t ctx = vfs_context_current();
4679         int error;
4680         struct nameidata nd;
4681
4682         AUDIT_ARG(fflags, uap->flags);
4683         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4684                 UIO_USERSPACE, uap->path, ctx);
4685         error = namei(&nd);
4686         if (error)
4687                 return (error);
4688         vp = nd.ni_vp;
4689         nameidone(&nd);
4690
4691         error = chflags1(vp, uap->flags, ctx);
4692
4693         return(error);
4694 }
4695
4696 /*
4697  * Change flags of a file given a file descriptor.
4698  */
4699 /* ARGSUSED */
4700 int
4701 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
4702 {
4703         vnode_t vp;
4704         int error;
4705
4706         AUDIT_ARG(fd, uap->fd);
4707         AUDIT_ARG(fflags, uap->flags);
4708         if ( (error = file_vnode(uap->fd, &vp)) )
4709                 return (error);
4710
4711         if ((error = vnode_getwithref(vp))) {
4712                 file_drop(uap->fd);
4713                 return(error);
4714         }
4715
4716         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4717
4718         error = chflags1(vp, uap->flags, vfs_context_current());
4719
4720         file_drop(uap->fd);
4721         return (error);
4722 }
4723
4724 /*
4725  * Change security information on a filesystem object.
4726  *
4727  * Returns:     0                       Success
4728  *              EPERM                   Operation not permitted
4729  *              vnode_authattr:???      [anything vnode_authattr can return]
4730  *              vnode_authorize:???     [anything vnode_authorize can return]
4731  *              vnode_setattr:???       [anything vnode_setattr can return]
4732  *
4733  * Notes:       If vnode_authattr or vnode_authorize return EACCES, it will be
4734  *              translated to EPERM before being returned.
4735  */
4736 static int
4737 chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
4738 {
4739         kauth_action_t action;
4740         int error;
4741
4742         AUDIT_ARG(mode, vap->va_mode);
4743         /* XXX audit new args */
4744
4745 #if NAMEDSTREAMS
4746         /* chmod calls are not allowed for resource forks. */
4747         if (vp->v_flag & VISNAMEDSTREAM) {
4748                 return (EPERM);
4749         }
4750 #endif
4751
4752 #if CONFIG_MACF
4753         error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode);
4754         if (error)
4755                 return (error);
4756 #endif
4757
4758         /* make sure that the caller is allowed to set this security information */
4759         if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
4760             ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
4761                 if (error == EACCES)
4762                         error = EPERM;
4763                 return(error);
4764         }
4765
4766         error = vnode_setattr(vp, vap, ctx);
4767
4768         return (error);
4769 }
4770
4771
4772 /*
4773  * Change mode of a file given a path name.
4774  *
4775  * Returns:     0                       Success
4776  *              namei:???               [anything namei can return]
4777  *              chmod2:???              [anything chmod2 can return]
4778  */
4779 static int
4780 chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
4781 {
4782         struct nameidata nd;
4783         int error;
4784
4785         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4786                 UIO_USERSPACE, path, ctx);
4787         if ((error = namei(&nd)))
4788                 return (error);
4789         error = chmod2(ctx, nd.ni_vp, vap);
4790         vnode_put(nd.ni_vp);
4791         nameidone(&nd);
4792         return(error);
4793 }
4794
4795 /*
4796  * chmod_extended: Change the mode of a file given a path name; with extended
4797  * argument list (including extended security (ACL)).
4798  *
4799  * Parameters:  p                       Process requesting the open
4800  *              uap                     User argument descriptor (see below)
4801  *              retval                  (ignored)
4802  *
4803  * Indirect:    uap->path               Path to object (same as 'chmod')
4804  *              uap->uid                UID to set
4805  *              uap->gid                GID to set
4806  *              uap->mode               File mode to set (same as 'chmod')
4807  *              uap->xsecurity          ACL to set (or delete)
4808  *
4809  * Returns:     0                       Success
4810  *              !0                      errno value
4811  *
4812  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order.
4813  *
4814  * XXX:         We should enummerate the possible errno values here, and where
4815  *              in the code they originated.
4816  */
4817 int
4818 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
4819 {
4820         int error;
4821         struct vnode_attr va;
4822         kauth_filesec_t xsecdst;
4823
4824         AUDIT_ARG(owner, uap->uid, uap->gid);
4825
4826         VATTR_INIT(&va);
4827         if (uap->mode != -1)
4828                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4829         if (uap->uid != KAUTH_UID_NONE)
4830                 VATTR_SET(&va, va_uid, uap->uid);
4831         if (uap->gid != KAUTH_GID_NONE)
4832                 VATTR_SET(&va, va_gid, uap->gid);
4833
4834         xsecdst = NULL;
4835         switch(uap->xsecurity) {
4836                 /* explicit remove request */
4837         case CAST_USER_ADDR_T((void *)1):       /* _FILESEC_REMOVE_ACL */
4838                 VATTR_SET(&va, va_acl, NULL);
4839                 break;
4840                 /* not being set */
4841         case USER_ADDR_NULL:
4842                 break;
4843         default:
4844                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4845                         return(error);
4846                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4847                 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
4848         }
4849
4850         error = chmod1(vfs_context_current(), uap->path, &va);
4851
4852         if (xsecdst != NULL)
4853                 kauth_filesec_free(xsecdst);
4854         return(error);
4855 }
4856
4857 /*
4858  * Returns:     0                       Success
4859  *              chmod1:???              [anything chmod1 can return]
4860  */
4861 int
4862 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
4863 {
4864         struct vnode_attr va;
4865
4866         VATTR_INIT(&va);
4867         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4868
4869         return(chmod1(vfs_context_current(), uap->path, &va));
4870 }
4871
4872 /*
4873  * Change mode of a file given a file descriptor.
4874  */
4875 static int
4876 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
4877 {
4878         vnode_t vp;
4879         int error;
4880
4881         AUDIT_ARG(fd, fd);
4882
4883         if ((error = file_vnode(fd, &vp)) != 0)
4884                 return (error);
4885         if ((error = vnode_getwithref(vp)) != 0) {
4886                 file_drop(fd);
4887                 return(error);
4888         }
4889         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4890
4891         error = chmod2(vfs_context_current(), vp, vap);
4892         (void)vnode_put(vp);
4893         file_drop(fd);
4894
4895         return (error);
4896 }
4897
4898 /*
4899  * fchmod_extended: Change mode of a file given a file descriptor; with
4900  * extended argument list (including extended security (ACL)).
4901  *
4902  * Parameters:    p                       Process requesting to change file mode
4903  *                uap                     User argument descriptor (see below)
4904  *                retval                  (ignored)
4905  *
4906  * Indirect:      uap->mode               File mode to set (same as 'chmod')
4907  *                uap->uid                UID to set
4908  *                uap->gid                GID to set
4909  *                uap->xsecurity          ACL to set (or delete)
4910  *                uap->fd                 File descriptor of file to change mode
4911  *
4912  * Returns:        0                      Success
4913  *                !0                      errno value
4914  *
4915  */
4916 int
4917 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
4918 {
4919         int error;
4920         struct vnode_attr va;
4921         kauth_filesec_t xsecdst;
4922
4923         AUDIT_ARG(owner, uap->uid, uap->gid);
4924
4925         VATTR_INIT(&va);
4926         if (uap->mode != -1)
4927                 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4928         if (uap->uid != KAUTH_UID_NONE)
4929                 VATTR_SET(&va, va_uid, uap->uid);
4930         if (uap->gid != KAUTH_GID_NONE)
4931                 VATTR_SET(&va, va_gid, uap->gid);
4932
4933         xsecdst = NULL;
4934         switch(uap->xsecurity) {
4935         case USER_ADDR_NULL:
4936                 VATTR_SET(&va, va_acl, NULL);
4937                 break;
4938         case CAST_USER_ADDR_T(-1):
4939                 break;
4940         default:
4941                 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4942                         return(error);
4943                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4944         }
4945
4946         error = fchmod1(p, uap->fd, &va);
4947
4948
4949         switch(uap->xsecurity) {
4950         case USER_ADDR_NULL:
4951         case CAST_USER_ADDR_T(-1):
4952                 break;
4953         default:
4954                 if (xsecdst != NULL)
4955                         kauth_filesec_free(xsecdst);
4956         }
4957         return(error);
4958 }
4959
4960 int
4961 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
4962 {
4963         struct vnode_attr va;
4964
4965         VATTR_INIT(&va);
4966         VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4967
4968         return(fchmod1(p, uap->fd, &va));
4969 }
4970
4971
4972 /*
4973  * Set ownership given a path name.
4974  */
4975 /* ARGSUSED */
4976 static int
4977 chown1(vfs_context_t ctx, struct chown_args *uap, __unused int32_t *retval, int follow)
4978 {
4979         vnode_t vp;
4980         struct vnode_attr va;
4981         int error;
4982         struct nameidata nd;
4983         kauth_action_t action;
4984
4985         AUDIT_ARG(owner, uap->uid, uap->gid);
4986
4987         NDINIT(&nd, LOOKUP, (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1,
4988                 UIO_USERSPACE, uap->path, ctx);
4989         error = namei(&nd);
4990         if (error)
4991                 return (error);
4992         vp = nd.ni_vp;
4993
4994         nameidone(&nd);
4995
4996         VATTR_INIT(&va);
4997         if (uap->uid != VNOVAL)
4998                 VATTR_SET(&va, va_uid, uap->uid);
4999         if (uap->gid != VNOVAL)
5000                 VATTR_SET(&va, va_gid, uap->gid);
5001
5002 #if CONFIG_MACF
5003         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
5004         if (error)
5005                 goto out;
5006 #endif
5007
5008         /* preflight and authorize attribute changes */
5009         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5010                 goto out;
5011         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
5012                 goto out;
5013         error = vnode_setattr(vp, &va, ctx);
5014
5015 out:
5016         /*
5017          * EACCES is only allowed from namei(); permissions failure should
5018          * return EPERM, so we need to translate the error code.
5019          */
5020         if (error == EACCES)
5021                 error = EPERM;
5022
5023         vnode_put(vp);
5024         return (error);
5025 }
5026
5027 int
5028 chown(__unused proc_t p, struct chown_args *uap, int32_t *retval)
5029 {
5030         return chown1(vfs_context_current(), uap, retval, 1);
5031 }
5032
5033 int
5034 lchown(__unused proc_t p, struct lchown_args *uap, int32_t *retval)
5035 {
5036         /* Argument list identical, but machine generated; cast for chown1() */
5037         return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0);
5038 }
5039
5040 /*
5041  * Set ownership given a file descriptor.
5042  */
5043 /* ARGSUSED */
5044 int
5045 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
5046 {
5047         struct vnode_attr va;
5048         vfs_context_t ctx = vfs_context_current();
5049         vnode_t vp;
5050         int error;
5051         kauth_action_t action;
5052
5053         AUDIT_ARG(owner, uap->uid, uap->gid);
5054         AUDIT_ARG(fd, uap->fd);
5055
5056         if ( (error = file_vnode(uap->fd, &vp)) )
5057                 return (error);
5058
5059         if ( (error = vnode_getwithref(vp)) ) {
5060                 file_drop(uap->fd);
5061                 return(error);
5062         }
5063         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5064
5065         VATTR_INIT(&va);
5066         if (uap->uid != VNOVAL)
5067                 VATTR_SET(&va, va_uid, uap->uid);
5068         if (uap->gid != VNOVAL)
5069                 VATTR_SET(&va, va_gid, uap->gid);
5070
5071 #if NAMEDSTREAMS
5072         /* chown calls are not allowed for resource forks. */
5073         if (vp->v_flag & VISNAMEDSTREAM) {
5074                 error = EPERM;
5075                 goto out;
5076         }
5077 #endif
5078
5079 #if CONFIG_MACF
5080         error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
5081         if (error)
5082                 goto out;
5083 #endif
5084
5085         /* preflight and authorize attribute changes */
5086         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5087                 goto out;
5088         if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5089                 if (error == EACCES)
5090                         error = EPERM;
5091                 goto out;
5092         }
5093         error = vnode_setattr(vp, &va, ctx);
5094
5095 out:
5096         (void)vnode_put(vp);
5097         file_drop(uap->fd);
5098         return (error);
5099 }
5100
5101 static int
5102 getutimes(user_addr_t usrtvp, struct timespec *tsp)
5103 {
5104         int error;
5105
5106         if (usrtvp == USER_ADDR_NULL) {
5107                 struct timeval old_tv;
5108                 /* XXX Y2038 bug because of microtime argument */
5109                 microtime(&old_tv);
5110                 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
5111                 tsp[1] = tsp[0];
5112         } else {
5113                 if (IS_64BIT_PROCESS(current_proc())) {
5114                         struct user64_timeval tv[2];
5115                         error = copyin(usrtvp, (void *)tv, sizeof(tv));
5116                         if (error)
5117                                 return (error);
5118                         TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
5119                         TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
5120                 } else {
5121                         struct user32_timeval tv[2];
5122                         error = copyin(usrtvp, (void *)tv, sizeof(tv));
5123                         if (error)
5124                                 return (error);
5125                         TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
5126                         TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
5127                 }
5128         }
5129         return 0;
5130 }
5131
5132 static int
5133 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
5134         int nullflag)
5135 {
5136         int error;
5137         struct vnode_attr va;
5138         kauth_action_t action;
5139
5140         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5141
5142         VATTR_INIT(&va);
5143         VATTR_SET(&va, va_access_time, ts[0]);
5144         VATTR_SET(&va, va_modify_time, ts[1]);
5145         if (nullflag)
5146                 va.va_vaflags |= VA_UTIMES_NULL;
5147
5148 #if NAMEDSTREAMS
5149         /* utimes calls are not allowed for resource forks. */
5150         if (vp->v_flag & VISNAMEDSTREAM) {
5151                 error = EPERM;
5152                 goto out;
5153         }
5154 #endif
5155
5156 #if CONFIG_MACF
5157         error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
5158         if (error)
5159                 goto out;
5160 #endif
5161         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
5162                 if (!nullflag && error == EACCES)
5163                         error = EPERM;
5164                 goto out;
5165         }
5166
5167         /* since we may not need to auth anything, check here */
5168         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5169                 if (!nullflag && error == EACCES)
5170                         error = EPERM;
5171                 goto out;
5172         }
5173         error = vnode_setattr(vp, &va, ctx);
5174
5175 out:
5176         return error;
5177 }
5178
5179 /*
5180  * Set the access and modification times of a file.
5181  */
5182 /* ARGSUSED */
5183 int
5184 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
5185 {
5186         struct timespec ts[2];
5187         user_addr_t usrtvp;
5188         int error;
5189         struct nameidata nd;
5190         vfs_context_t ctx = vfs_context_current();
5191
5192         /*
5193          * AUDIT: Needed to change the order of operations to do the
5194          * name lookup first because auditing wants the path.
5195          */
5196         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
5197                 UIO_USERSPACE, uap->path, ctx);
5198         error = namei(&nd);
5199         if (error)
5200                 return (error);
5201         nameidone(&nd);
5202
5203         /*
5204          * Fetch the user-supplied time.  If usrtvp is USER_ADDR_NULL, we fetch
5205          * the current time instead.
5206          */
5207         usrtvp = uap->tptr;
5208         if ((error = getutimes(usrtvp, ts)) != 0)
5209                 goto out;
5210
5211         error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
5212
5213 out:
5214         vnode_put(nd.ni_vp);
5215         return (error);
5216 }
5217
5218 /*
5219  * Set the access and modification times of a file.
5220  */
5221 /* ARGSUSED */
5222 int
5223 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
5224 {
5225         struct timespec ts[2];
5226         vnode_t vp;
5227         user_addr_t usrtvp;
5228         int error;
5229
5230         AUDIT_ARG(fd, uap->fd);
5231         usrtvp = uap->tptr;
5232         if ((error = getutimes(usrtvp, ts)) != 0)
5233                 return (error);
5234         if ((error = file_vnode(uap->fd, &vp)) != 0)
5235                 return (error);
5236         if((error = vnode_getwithref(vp))) {
5237                 file_drop(uap->fd);
5238                 return(error);
5239         }
5240
5241         error =  setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
5242         vnode_put(vp);
5243         file_drop(uap->fd);
5244         return(error);
5245 }
5246
5247 /*
5248  * Truncate a file given its path name.
5249  */
5250 /* ARGSUSED */
5251 int
5252 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
5253 {
5254         vnode_t vp;
5255         struct vnode_attr va;
5256         vfs_context_t ctx = vfs_context_current();
5257         int error;
5258         struct nameidata nd;
5259         kauth_action_t action;
5260
5261         if (uap->length < 0)
5262                 return(EINVAL);
5263         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
5264                 UIO_USERSPACE, uap->path, ctx);
5265         if ((error = namei(&nd)))
5266                 return (error);
5267         vp = nd.ni_vp;
5268
5269         nameidone(&nd);
5270
5271         VATTR_INIT(&va);
5272         VATTR_SET(&va, va_data_size, uap->length);
5273
5274 #if CONFIG_MACF
5275         error = mac_vnode_check_truncate(ctx, NOCRED, vp);
5276         if (error)
5277                 goto out;
5278 #endif
5279
5280         if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5281                 goto out;
5282         if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
5283                 goto out;
5284         error = vnode_setattr(vp, &va, ctx);
5285 out:
5286         vnode_put(vp);
5287         return (error);
5288 }
5289
5290 /*
5291  * Truncate a file given a file descriptor.
5292  */
5293 /* ARGSUSED */
5294 int
5295 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
5296 {
5297         vfs_context_t ctx = vfs_context_current();
5298         struct vnode_attr va;
5299         vnode_t vp;
5300         struct fileproc *fp;
5301         int error ;
5302         int fd = uap->fd;
5303
5304         AUDIT_ARG(fd, uap->fd);
5305         if (uap->length < 0)
5306                 return(EINVAL);
5307
5308         if ( (error = fp_lookup(p,fd,&fp,0)) ) {
5309                 return(error);
5310         }
5311
5312         if (fp->f_fglob->fg_type == DTYPE_PSXSHM) {
5313                 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
5314                 goto out;
5315         }
5316         if (fp->f_fglob->fg_type != DTYPE_VNODE)  {
5317                 error = EINVAL;
5318                 goto out;
5319         }
5320
5321         vp = (vnode_t)fp->f_fglob->fg_data;
5322
5323         if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
5324                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
5325                 error = EINVAL;
5326                 goto out;
5327         }
5328
5329         if ((error = vnode_getwithref(vp)) != 0) {
5330                 goto out;
5331         }
5332
5333         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5334
5335 #if CONFIG_MACF
5336         error = mac_vnode_check_truncate(ctx,
5337             fp->f_fglob->fg_cred, vp);
5338         if (error) {
5339                 (void)vnode_put(vp);
5340                 goto out;
5341         }
5342 #endif
5343         VATTR_INIT(&va);
5344         VATTR_SET(&va, va_data_size, uap->length);
5345         error = vnode_setattr(vp, &va, ctx);
5346         (void)vnode_put(vp);
5347 out:
5348         file_drop(fd);
5349         return (error);
5350 }
5351
5352
5353 /*
5354  * Sync an open file with synchronized I/O _file_ integrity completion
5355  */
5356 /* ARGSUSED */
5357 int
5358 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
5359 {
5360         __pthread_testcancel(1);
5361         return(fsync_common(p, uap, MNT_WAIT));
5362 }
5363
5364
5365 /*
5366  * Sync an open file with synchronized I/O _file_ integrity completion
5367  *
5368  * Notes:       This is a legacy support function that does not test for
5369  *              thread cancellation points.
5370  */
5371 /* ARGSUSED */
5372 int
5373 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
5374 {
5375         return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
5376 }
5377
5378
5379 /*
5380  * Sync an open file with synchronized I/O _data_ integrity completion
5381  */
5382 /* ARGSUSED */
5383 int
5384 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
5385 {
5386         __pthread_testcancel(1);
5387         return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
5388 }
5389
5390
5391 /*
5392  * fsync_common
5393  *
5394  * Common fsync code to support both synchronized I/O file integrity completion
5395  * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
5396  *
5397  * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
5398  * will only guarantee that the file data contents are retrievable.  If
5399  * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
5400  * includes additional metadata unnecessary for retrieving the file data
5401  * contents, such as atime, mtime, ctime, etc., also be committed to stable
5402  * storage.
5403  *
5404  * Parameters:  p                               The process
5405  *              uap->fd                         The descriptor to synchronize
5406  *              flags                           The data integrity flags
5407  *
5408  * Returns:     int                             Success
5409  *      fp_getfvp:EBADF                         Bad file descriptor
5410  *      fp_getfvp:ENOTSUP                       fd does not refer to a vnode
5411  *      VNOP_FSYNC:???                          unspecified
5412  *
5413  * Notes:       We use struct fsync_args because it is a short name, and all
5414  *              caller argument structures are otherwise identical.
5415  */
5416 static int
5417 fsync_common(proc_t p, struct fsync_args *uap, int flags)
5418 {
5419         vnode_t vp;
5420         struct fileproc *fp;
5421         vfs_context_t ctx = vfs_context_current();
5422         int error;
5423
5424         AUDIT_ARG(fd, uap->fd);
5425
5426         if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
5427                 return (error);
5428         if ( (error = vnode_getwithref(vp)) ) {
5429                 file_drop(uap->fd);
5430                 return(error);
5431         }
5432
5433         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5434
5435         error = VNOP_FSYNC(vp, flags, ctx);
5436
5437 #if NAMEDRSRCFORK
5438         /* Sync resource fork shadow file if necessary. */
5439         if ((error == 0) &&
5440             (vp->v_flag & VISNAMEDSTREAM) &&
5441             (vp->v_parent != NULLVP) &&
5442             vnode_isshadow(vp) &&
5443             (fp->f_flags & FP_WRITTEN)) {
5444                 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
5445         }
5446 #endif
5447
5448         (void)vnode_put(vp);
5449         file_drop(uap->fd);
5450         return (error);
5451 }
5452
5453 /*
5454  * Duplicate files.  Source must be a file, target must be a file or
5455  * must not exist.
5456  *
5457  * XXX Copyfile authorisation checking is woefully inadequate, and will not
5458  *     perform inheritance correctly.
5459  */
5460 /* ARGSUSED */
5461 int
5462 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
5463 {
5464         vnode_t tvp, fvp, tdvp, sdvp;
5465         struct nameidata fromnd, tond;
5466         int error;
5467         vfs_context_t ctx = vfs_context_current();
5468
5469         /* Check that the flags are valid. */
5470
5471         if (uap->flags & ~CPF_MASK) {
5472                 return(EINVAL);
5473         }
5474
5475         NDINIT(&fromnd, LOOKUP, SAVESTART | AUDITVNPATH1,
5476                 UIO_USERSPACE, uap->from, ctx);
5477         if ((error = namei(&fromnd)))
5478                 return (error);
5479         fvp = fromnd.ni_vp;
5480
5481         NDINIT(&tond, CREATE,  LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
5482             UIO_USERSPACE, uap->to, ctx);
5483         if ((error = namei(&tond))) {
5484                 goto out1;
5485         }
5486         tdvp = tond.ni_dvp;
5487         tvp = tond.ni_vp;
5488
5489         if (tvp != NULL) {
5490                 if (!(uap->flags & CPF_OVERWRITE)) {
5491                         error = EEXIST;
5492                         goto out;
5493                 }
5494         }
5495         if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
5496                 error = EISDIR;
5497                 goto out;
5498         }
5499
5500         if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
5501                 goto out;
5502
5503         if (fvp == tdvp)
5504                 error = EINVAL;
5505         /*
5506          * If source is the same as the destination (that is the
5507          * same inode number) then there is nothing to do.
5508          * (fixed to have POSIX semantics - CSM 3/2/98)
5509          */
5510         if (fvp == tvp)
5511                 error = -1;
5512         if (!error)
5513                 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
5514 out:
5515         sdvp = tond.ni_startdir;
5516         /*
5517          * nameidone has to happen before we vnode_put(tdvp)
5518          * since it may need to release the fs_nodelock on the tdvp
5519          */
5520         nameidone(&tond);
5521
5522         if (tvp)
5523                 vnode_put(tvp);
5524         vnode_put(tdvp);
5525         vnode_put(sdvp);
5526 out1:
5527         vnode_put(fvp);
5528
5529         if (fromnd.ni_startdir)
5530                 vnode_put(fromnd.ni_startdir);
5531         nameidone(&fromnd);
5532
5533         if (error == -1)
5534                 return (0);
5535         return (error);
5536 }
5537
5538
5539 /*
5540  * Rename files.  Source and destination must either both be directories,
5541  * or both not be directories.  If target is a directory, it must be empty.
5542  */
5543 /* ARGSUSED */
5544 int
5545 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
5546 {
5547         vnode_t tvp, tdvp;
5548         vnode_t fvp, fdvp;
5549         struct nameidata fromnd, tond;
5550         vfs_context_t ctx = vfs_context_current();
5551         int error;
5552         int do_retry;
5553         int mntrename;
5554         int need_event;
5555         const char *oname;
5556         char *from_name = NULL, *to_name = NULL;
5557         int from_len=0, to_len=0;
5558         int holding_mntlock;
5559         mount_t locked_mp = NULL;
5560         vnode_t oparent;
5561 #if CONFIG_FSE
5562         fse_info from_finfo, to_finfo;
5563 #endif
5564         int from_truncated=0, to_truncated;
5565
5566         holding_mntlock = 0;
5567     do_retry = 0;
5568 retry:
5569         fvp = tvp = NULL;
5570         fdvp = tdvp = NULL;
5571         mntrename = FALSE;
5572
5573         NDINIT(&fromnd, DELETE, WANTPARENT | AUDITVNPATH1, UIO_USERSPACE, uap->from, ctx);
5574
5575         if ( (error = namei(&fromnd)) )
5576                 goto out1;
5577         fdvp = fromnd.ni_dvp;
5578         fvp  = fromnd.ni_vp;
5579
5580 #if CONFIG_MACF
5581         error = mac_vnode_check_rename_from(ctx, fdvp, fvp, &fromnd.ni_cnd);
5582         if (error)
5583                 goto out1;
5584 #endif
5585
5586         NDINIT(&tond, RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK , UIO_USERSPACE, uap->to, ctx);
5587         if (fvp->v_type == VDIR)
5588                 tond.ni_cnd.cn_flags |= WILLBEDIR;
5589
5590         if ( (error = namei(&tond)) ) {
5591                 /*
5592                  * Translate error code for rename("dir1", "dir2/.").
5593                  */
5594                 if (error == EISDIR && fvp->v_type == VDIR)
5595                         error = EINVAL;
5596                 goto out1;
5597         }
5598         tdvp = tond.ni_dvp;
5599         tvp  = tond.ni_vp;
5600
5601 #if CONFIG_MACF
5602         error = mac_vnode_check_rename_to(ctx,
5603             tdvp, tvp, fdvp == tdvp, &tond.ni_cnd);
5604         if (error)
5605                 goto out1;
5606 #endif
5607
5608         if (tvp != NULL) {
5609                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
5610                         error = ENOTDIR;
5611                         goto out1;
5612                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
5613                         error = EISDIR;
5614                         goto out1;
5615                 }
5616         }
5617         if (fvp == tdvp) {
5618                 error = EINVAL;
5619                 goto out1;
5620         }
5621         /*
5622          * If the source and destination are the same (i.e. they're
5623          * links to the same vnode) and the target file system is
5624          * case sensitive, then there is nothing to do.
5625          */
5626         if (fvp == tvp) {
5627                 int pathconf_val;
5628
5629                 /*
5630                  * Note: if _PC_CASE_SENSITIVE selector isn't supported,
5631                  * then assume that this file system is case sensitive.
5632                  */
5633                 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
5634                     pathconf_val != 0) {
5635                         goto out1;
5636                 }
5637         }
5638
5639         /*
5640          * Authorization.
5641          *
5642          * If tvp is a directory and not the same as fdvp, or tdvp is not
5643          * the same as fdvp, the node is moving between directories and we
5644          * need rights to remove from the old and add to the new.
5645          *
5646          * If tvp already exists and is not a directory, we need to be
5647          * allowed to delete it.
5648          *
5649          * Note that we do not inherit when renaming.
5650          *
5651          * XXX This needs to be revisited to implement the deferred-inherit bit
5652          */
5653         {
5654                 int moving = 0;
5655
5656                 error = 0;
5657                 if ((tvp != NULL) && vnode_isdir(tvp)) {
5658                         if (tvp != fdvp)
5659                                 moving = 1;
5660                 } else if (tdvp != fdvp) {
5661                         moving = 1;
5662                 }
5663                 /*
5664                  * must have delete rights to remove the old name even in
5665                  * the simple case of fdvp == tdvp.
5666                  *
5667                  * If fvp is a directory, and we are changing it's parent,
5668                  * then we also need rights to rewrite its ".." entry as well.
5669                  */
5670                 if (vnode_isdir(fvp)) {
5671                         if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE | KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
5672                                 goto auth_exit;
5673                 } else {
5674                 if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE, ctx)) != 0)
5675                         goto auth_exit;
5676                 }
5677                 if (moving) {
5678                         /* moving into tdvp or tvp, must have rights to add */
5679                         if ((error = vnode_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp,
5680                                  NULL,
5681                                  vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE,
5682                                  ctx)) != 0) {
5683                 /*
5684                  * We could encounter a race where after doing the namei, tvp stops
5685                  * being valid. If so, simply re-drive the rename call from the
5686                  * top.
5687                  */
5688                  if (error == ENOENT) {
5689                      do_retry = 1;
5690                  }
5691                                 goto auth_exit;
5692                         }
5693                 } else {
5694                         /* node staying in same directory, must be allowed to add new name */
5695                         if ((error = vnode_authorize(fdvp, NULL,
5696                                  vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, ctx)) != 0)
5697                                 goto auth_exit;
5698                 }
5699                 /* overwriting tvp */
5700                 if ((tvp != NULL) && !vnode_isdir(tvp) &&
5701                     ((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx)) != 0)) {
5702             /*
5703              * We could encounter a race where after doing the namei, tvp stops
5704              * being valid. If so, simply re-drive the rename call from the
5705              * top.
5706              */
5707             if (error == ENOENT) {
5708                 do_retry = 1;
5709             }
5710                         goto auth_exit;
5711                 }
5712
5713                 /* XXX more checks? */
5714
5715 auth_exit:
5716                 /* authorization denied */
5717                 if (error != 0)
5718                         goto out1;
5719         }
5720         /*
5721          * Allow the renaming of mount points.
5722          * - target must not exist
5723          * - target must reside in the same directory as source
5724          * - union mounts cannot be renamed
5725          * - "/" cannot be renamed
5726          */
5727         if ((fvp->v_flag & VROOT) &&
5728             (fvp->v_type == VDIR) &&
5729             (tvp == NULL)  &&
5730             (fvp->v_mountedhere == NULL)  &&
5731             (fdvp == tdvp)  &&
5732             ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0)  &&
5733             (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
5734                 vnode_t coveredvp;
5735
5736                 /* switch fvp to the covered vnode */
5737                 coveredvp = fvp->v_mount->mnt_vnodecovered;
5738                 if ( (vnode_getwithref(coveredvp)) ) {
5739                         error = ENOENT;
5740                         goto out1;
5741                 }
5742                 vnode_put(fvp);
5743
5744                 fvp = coveredvp;
5745                 mntrename = TRUE;
5746         }
5747         /*
5748          * Check for cross-device rename.
5749          */
5750         if ((fvp->v_mount != tdvp->v_mount) ||
5751             (tvp && (fvp->v_mount != tvp->v_mount))) {
5752                 error = EXDEV;
5753                 goto out1;
5754         }
5755         /*
5756          * Avoid renaming "." and "..".
5757          */
5758         if (fvp->v_type == VDIR &&
5759             ((fdvp == fvp) ||
5760              (fromnd.ni_cnd.cn_namelen == 1 && fromnd.ni_cnd.cn_nameptr[0] == '.') ||
5761              ((fromnd.ni_cnd.cn_flags | tond.ni_cnd.cn_flags) & ISDOTDOT)) ) {
5762                 error = EINVAL;
5763                 goto out1;
5764         }
5765         /*
5766          * The following edge case is caught here:
5767          * (to cannot be a descendent of from)
5768          *
5769          *       o fdvp
5770          *      /
5771          *     /
5772          *    o fvp
5773          *     \
5774          *      \
5775          *       o tdvp
5776          *      /
5777          *     /
5778          *    o tvp
5779          */
5780         if (tdvp->v_parent == fvp) {
5781                 error = EINVAL;
5782                 goto out1;
5783         }
5784
5785         /*
5786          * If source is the same as the destination (that is the
5787          * same inode number) then there is nothing to do...
5788          * EXCEPT if the underlying file system supports case
5789          * insensitivity and is case preserving.  In this case
5790          * the file system needs to handle the special case of
5791          * getting the same vnode as target (fvp) and source (tvp).
5792          *
5793          * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
5794          * and _PC_CASE_PRESERVING can have this exception, and they need to
5795          * handle the special case of getting the same vnode as target and
5796          * source.  NOTE: Then the target is unlocked going into vnop_rename,
5797          * so not to cause locking problems. There is a single reference on tvp.
5798          *
5799          * NOTE - that fvp == tvp also occurs if they are hard linked and
5800          * that correct behaviour then is just to return success without doing
5801          * anything.
5802          */
5803         if (fvp == tvp && fdvp == tdvp) {
5804                 if (fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
5805                     !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
5806                           fromnd.ni_cnd.cn_namelen)) {
5807                         goto out1;
5808                 }
5809         }
5810
5811         if (holding_mntlock && fvp->v_mount != locked_mp) {
5812                 /*
5813                  * we're holding a reference and lock
5814                  * on locked_mp, but it no longer matches
5815                  * what we want to do... so drop our hold
5816                  */
5817                 mount_unlock_renames(locked_mp);
5818                 mount_drop(locked_mp, 0);
5819                 holding_mntlock = 0;
5820         }
5821         if (tdvp != fdvp && fvp->v_type == VDIR) {
5822                 /*
5823                  * serialize renames that re-shape
5824                  * the tree... if holding_mntlock is
5825                  * set, then we're ready to go...
5826                  * otherwise we
5827                  * first need to drop the iocounts
5828                  * we picked up, second take the
5829                  * lock to serialize the access,
5830                  * then finally start the lookup
5831                  * process over with the lock held
5832                  */
5833                 if (!holding_mntlock) {
5834                         /*
5835                          * need to grab a reference on
5836                          * the mount point before we
5837                          * drop all the iocounts... once
5838                          * the iocounts are gone, the mount
5839                          * could follow
5840                          */
5841                         locked_mp = fvp->v_mount;
5842                         mount_ref(locked_mp, 0);
5843
5844                         /*
5845                          * nameidone has to happen before we vnode_put(tvp)
5846                          * since it may need to release the fs_nodelock on the tvp
5847                          */
5848                         nameidone(&tond);
5849
5850                         if (tvp)
5851                                 vnode_put(tvp);
5852                         vnode_put(tdvp);
5853
5854                         /*
5855                          * nameidone has to happen before we vnode_put(fdvp)
5856                          * since it may need to release the fs_nodelock on the fvp
5857                          */
5858                         nameidone(&fromnd);
5859
5860                         vnode_put(fvp);
5861                         vnode_put(fdvp);
5862
5863                         mount_lock_renames(locked_mp);
5864                         holding_mntlock = 1;
5865
5866                         goto retry;
5867                 }
5868         } else {
5869                 /*
5870                  * when we dropped the iocounts to take
5871                  * the lock, we allowed the identity of
5872                  * the various vnodes to change... if they did,
5873                  * we may no longer be dealing with a rename
5874                  * that reshapes the tree... once we're holding
5875                  * the iocounts, the vnodes can't change type
5876                  * so we're free to drop the lock at this point
5877                  * and continue on
5878                  */
5879                 if (holding_mntlock) {
5880                         mount_unlock_renames(locked_mp);
5881                         mount_drop(locked_mp, 0);
5882                         holding_mntlock = 0;
5883                 }
5884         }
5885         // save these off so we can later verify that fvp is the same
5886         oname   = fvp->v_name;
5887         oparent = fvp->v_parent;
5888
5889 #if CONFIG_FSE
5890         need_event = need_fsevent(FSE_RENAME, fvp);
5891         if (need_event) {
5892                 get_fse_info(fvp, &from_finfo, ctx);
5893
5894                 if (tvp) {
5895                         get_fse_info(tvp, &to_finfo, ctx);
5896                 }
5897         }
5898 #else
5899         need_event = 0;
5900 #endif /* CONFIG_FSE */
5901
5902         if (need_event || kauth_authorize_fileop_has_listeners()) {
5903                 GET_PATH(from_name);
5904                 if (from_name == NULL) {
5905                         error = ENOMEM;
5906                         goto out1;
5907                 }
5908
5909                 from_len = safe_getpath(fdvp, fromnd.ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
5910
5911                 GET_PATH(to_name);
5912                 if (to_name == NULL) {
5913                         error = ENOMEM;
5914                         goto out1;
5915                 }
5916
5917                 to_len = safe_getpath(tdvp, tond.ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
5918         }
5919
5920         error = VNOP_RENAME(fdvp, fvp, &fromnd.ni_cnd,
5921                             tdvp, tvp, &tond.ni_cnd,
5922                             ctx);
5923
5924         if (holding_mntlock) {
5925                 /*
5926                  * we can drop our serialization
5927                  * lock now
5928                  */
5929                 mount_unlock_renames(locked_mp);
5930                 mount_drop(locked_mp, 0);
5931                 holding_mntlock = 0;
5932         }
5933         if (error) {
5934         /*
5935          * We may encounter a race in the VNOP where the destination didn't
5936          * exist when we did the namei, but it does by the time we go and
5937          * try to create the entry. In this case, we should re-drive this rename
5938          * call from the top again.  Currently, only HFS bubbles out ERECYCLE,
5939                  * but other filesystems susceptible to this race could return it, too.
5940          */
5941         if (error == ERECYCLE) {
5942             do_retry = 1;
5943         }
5944
5945                 goto out1;
5946         }
5947
5948         /* call out to allow 3rd party notification of rename.
5949          * Ignore result of kauth_authorize_fileop call.
5950          */
5951         kauth_authorize_fileop(vfs_context_ucred(ctx),
5952                         KAUTH_FILEOP_RENAME,
5953                         (uintptr_t)from_name, (uintptr_t)to_name);
5954
5955 #if CONFIG_FSE
5956         if (from_name != NULL && to_name != NULL) {
5957                 if (from_truncated || to_truncated) {
5958                         // set it here since only the from_finfo gets reported up to user space
5959                         from_finfo.mode |= FSE_TRUNCATED_PATH;
5960                 }
5961                 if (tvp) {
5962                         add_fsevent(FSE_RENAME, ctx,
5963                                     FSE_ARG_STRING, from_len, from_name,
5964                                     FSE_ARG_FINFO, &from_finfo,
5965                                     FSE_ARG_STRING, to_len, to_name,
5966                                     FSE_ARG_FINFO, &to_finfo,
5967                                     FSE_ARG_DONE);
5968                 } else {
5969                         add_fsevent(FSE_RENAME, ctx,
5970                                     FSE_ARG_STRING, from_len, from_name,
5971                                     FSE_ARG_FINFO, &from_finfo,
5972                                     FSE_ARG_STRING, to_len, to_name,
5973                                     FSE_ARG_DONE);
5974                 }
5975         }
5976 #endif /* CONFIG_FSE */
5977
5978         /*
5979          * update filesystem's mount point data
5980          */
5981         if (mntrename) {
5982                 char *cp, *pathend, *mpname;
5983                 char * tobuf;
5984                 struct mount *mp;
5985                 int maxlen;
5986                 size_t len = 0;
5987
5988                 mp = fvp->v_mountedhere;
5989
5990                 if (vfs_busy(mp, LK_NOWAIT)) {
5991                         error = EBUSY;
5992                         goto out1;
5993                 }
5994                 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
5995
5996                 error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len);
5997                 if (!error) {
5998                         /* find current mount point prefix */
5999                         pathend = &mp->mnt_vfsstat.f_mntonname[0];
6000                         for (cp = pathend; *cp != '\0'; ++cp) {
6001                                 if (*cp == '/')
6002                                         pathend = cp + 1;
6003                         }
6004                         /* find last component of target name */
6005                         for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
6006                                 if (*cp == '/')
6007                                         mpname = cp + 1;
6008                         }
6009                         /* append name to prefix */
6010                         maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
6011                         bzero(pathend, maxlen);
6012                         strlcpy(pathend, mpname, maxlen);
6013                 }
6014                 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
6015
6016                 vfs_unbusy(mp);
6017         }
6018         /*
6019          * fix up name & parent pointers.  note that we first
6020          * check that fvp has the same name/parent pointers it
6021          * had before the rename call... this is a 'weak' check
6022          * at best...
6023          */
6024         if (oname == fvp->v_name && oparent == fvp->v_parent) {
6025                 int update_flags;
6026
6027                 update_flags = VNODE_UPDATE_NAME;
6028
6029                 if (fdvp != tdvp)
6030                         update_flags |= VNODE_UPDATE_PARENT;
6031
6032                 vnode_update_identity(fvp, tdvp, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen, tond.ni_cnd.cn_hash, update_flags);
6033         }
6034 out1:
6035         if (to_name != NULL) {
6036                 RELEASE_PATH(to_name);
6037                 to_name = NULL;
6038         }
6039         if (from_name != NULL) {
6040                 RELEASE_PATH(from_name);
6041                 from_name = NULL;
6042         }
6043         if (holding_mntlock) {
6044                 mount_unlock_renames(locked_mp);
6045                 mount_drop(locked_mp, 0);
6046                 holding_mntlock = 0;
6047         }
6048         if (tdvp) {
6049                 /*
6050                  * nameidone has to happen before we vnode_put(tdvp)
6051                  * since it may need to release the fs_nodelock on the tdvp
6052                  */
6053                 nameidone(&tond);
6054
6055                 if (tvp)
6056                         vnode_put(tvp);
6057                 vnode_put(tdvp);
6058         }
6059         if (fdvp) {
6060                 /*
6061                  * nameidone has to happen before we vnode_put(fdvp)
6062                  * since it may need to release the fs_nodelock on the fdvp
6063                  */
6064                 nameidone(&fromnd);
6065
6066                 if (fvp)
6067                         vnode_put(fvp);
6068                 vnode_put(fdvp);
6069         }
6070
6071     /*
6072      * If things changed after we did the namei, then we will re-drive
6073      * this rename call from the top.
6074      */
6075         if(do_retry) {
6076         do_retry = 0;
6077                 goto retry;
6078         }
6079
6080         return (error);
6081 }
6082
6083 /*
6084  * Make a directory file.
6085  *
6086  * Returns:     0                       Success
6087  *              EEXIST
6088  *      namei:???
6089  *      vnode_authorize:???
6090  *      vn_create:???
6091  */
6092 /* ARGSUSED */
6093 static int
6094 mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
6095 {
6096         vnode_t vp, dvp;
6097         int error;
6098         int update_flags = 0;
6099         struct nameidata nd;
6100
6101         AUDIT_ARG(mode, vap->va_mode);
6102         NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
6103                 UIO_USERSPACE, path, ctx);
6104         nd.ni_cnd.cn_flags |= WILLBEDIR;
6105         error = namei(&nd);
6106         if (error)
6107                 return (error);
6108         dvp = nd.ni_dvp;
6109         vp = nd.ni_vp;
6110
6111         if (vp != NULL) {
6112                 error = EEXIST;
6113                 goto out;
6114         }
6115
6116         VATTR_SET(vap, va_type, VDIR);
6117
6118 #if CONFIG_MACF
6119         error = mac_vnode_check_create(ctx,
6120             nd.ni_dvp, &nd.ni_cnd, vap);
6121         if (error)
6122                 goto out;
6123 #endif
6124
6125         /* authorize addition of a directory to the parent */
6126         if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
6127                 goto out;
6128
6129
6130         /* make the directory */
6131         if ((error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx)) != 0)
6132                 goto out;
6133
6134         // Make sure the name & parent pointers are hooked up
6135         if (vp->v_name == NULL)
6136                 update_flags |= VNODE_UPDATE_NAME;
6137         if (vp->v_parent == NULLVP)
6138                 update_flags |= VNODE_UPDATE_PARENT;
6139
6140         if (update_flags)
6141                 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
6142
6143 #if CONFIG_FSE
6144         add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
6145 #endif
6146
6147 out:
6148         /*
6149          * nameidone has to happen before we vnode_put(dvp)
6150          * since it may need to release the fs_nodelock on the dvp
6151          */
6152         nameidone(&nd);
6153
6154         if (vp)
6155                 vnode_put(vp);
6156         vnode_put(dvp);
6157
6158         return (error);
6159 }
6160
6161 /*
6162  * mkdir_extended: Create a directory; with extended security (ACL).
6163  *
6164  * Parameters:    p                       Process requesting to create the directory
6165  *                uap                     User argument descriptor (see below)
6166  *                retval                  (ignored)
6167  *
6168  * Indirect:      uap->path               Path of directory to create
6169  *                uap->mode               Access permissions to set
6170  *                uap->xsecurity          ACL to set
6171  *
6172  * Returns:        0                      Success
6173  *                !0                      Not success
6174  *
6175  */
6176 int
6177 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
6178 {
6179         int ciferror;
6180         kauth_filesec_t xsecdst;
6181         struct vnode_attr va;
6182
6183         AUDIT_ARG(owner, uap->uid, uap->gid);
6184
6185         xsecdst = NULL;
6186         if ((uap->xsecurity != USER_ADDR_NULL) &&
6187             ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
6188                 return ciferror;
6189
6190         VATTR_INIT(&va);
6191         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
6192         if (xsecdst != NULL)
6193                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6194
6195         ciferror = mkdir1(vfs_context_current(), uap->path, &va);
6196         if (xsecdst != NULL)
6197                 kauth_filesec_free(xsecdst);
6198         return ciferror;
6199 }
6200
6201 int
6202 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
6203 {
6204         struct vnode_attr va;
6205
6206         VATTR_INIT(&va);
6207         VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
6208
6209         return(mkdir1(vfs_context_current(), uap->path, &va));
6210 }
6211
6212 /*
6213  * Remove a directory file.
6214  */
6215 /* ARGSUSED */
6216 int
6217 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
6218 {
6219         vnode_t vp, dvp;
6220         int error;
6221         struct nameidata nd;
6222         vfs_context_t ctx = vfs_context_current();
6223
6224         int restart_flag;
6225         uint32_t oldvp_id = UINT32_MAX;
6226
6227         /*
6228          * This loop exists to restart rmdir in the unlikely case that two
6229          * processes are simultaneously trying to remove the same directory
6230          * containing orphaned appleDouble files.
6231          */
6232         do {
6233                 restart_flag = 0;
6234
6235                 NDINIT(&nd, DELETE, LOCKPARENT | AUDITVNPATH1,
6236                                 UIO_USERSPACE, uap->path, ctx);
6237                 error = namei(&nd);
6238                 if (error)
6239                         return (error);
6240
6241                 dvp = nd.ni_dvp;
6242                 vp = nd.ni_vp;
6243
6244
6245                 /*
6246                  * If being restarted check if the new vp
6247                  * still has the same v_id.
6248                  */
6249                 if (oldvp_id != UINT32_MAX && oldvp_id != vp->v_id) {
6250                         error = ENOENT;
6251                         goto out;
6252                 }
6253
6254                 if (vp->v_type != VDIR) {
6255                         /*
6256                          * rmdir only deals with directories
6257                          */
6258                         error = ENOTDIR;
6259                 } else if (dvp == vp) {
6260                         /*
6261                          * No rmdir "." please.
6262                          */
6263                         error = EINVAL;
6264                 } else if (vp->v_flag & VROOT) {
6265                         /*
6266                          * The root of a mounted filesystem cannot be deleted.
6267                          */
6268                         error = EBUSY;
6269                 } else {
6270 #if CONFIG_MACF
6271                         error = mac_vnode_check_unlink(ctx, dvp,
6272                                         vp, &nd.ni_cnd);
6273                         if (!error)
6274 #endif
6275                                 error = vnode_authorize(vp, nd.ni_dvp, KAUTH_VNODE_DELETE, ctx);
6276                 }
6277                 if (!error) {
6278                         char     *path = NULL;
6279                         int       len=0;
6280                         int has_listeners = 0;
6281                         int need_event = 0;
6282                         int truncated = 0;
6283 #if CONFIG_FSE
6284                         fse_info  finfo;
6285
6286                         need_event = need_fsevent(FSE_DELETE, dvp);
6287                         if (need_event) {
6288                                 get_fse_info(vp, &finfo, ctx);
6289                         }
6290 #endif
6291                         has_listeners = kauth_authorize_fileop_has_listeners();
6292                         if (need_event || has_listeners) {
6293                                 GET_PATH(path);
6294                                 if (path == NULL) {
6295                                         error = ENOMEM;
6296                                         goto out;
6297                                 }
6298
6299                                 len = safe_getpath(vp, NULL, path, MAXPATHLEN, &truncated);
6300 #if CONFIG_FSE
6301                                 if (truncated) {
6302                                         finfo.mode |= FSE_TRUNCATED_PATH;
6303                                 }
6304 #endif
6305                         }
6306
6307                         error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
6308
6309                         /*
6310                          * Special case to remove orphaned AppleDouble
6311                          * files. I don't like putting this in the kernel,
6312                          * but carbon does not like putting this in carbon either,
6313                          * so here we are.
6314                          */
6315                         if (error == ENOTEMPTY) {
6316                                 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
6317                                 if (error == EBUSY) {
6318                                         oldvp_id = vp->v_id;
6319                                         goto out;
6320                                 }
6321
6322
6323                                 /*
6324                                  * Assuming everything went well, we will try the RMDIR again
6325                                  */
6326                                 if (!error)
6327                                         error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
6328                         }
6329
6330                         /*
6331                          * Call out to allow 3rd party notification of delete.
6332                          * Ignore result of kauth_authorize_fileop call.
6333                          */
6334                         if (!error) {
6335                                 if (has_listeners) {
6336                                         kauth_authorize_fileop(vfs_context_ucred(ctx),
6337                                                         KAUTH_FILEOP_DELETE,
6338                                                         (uintptr_t)vp,
6339                                                         (uintptr_t)path);
6340                                 }
6341
6342                                 if (vp->v_flag & VISHARDLINK) {
6343                                     // see the comment in unlink1() about why we update
6344                                     // the parent of a hard link when it is removed
6345                                     vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
6346                                 }
6347
6348 #if CONFIG_FSE
6349                                 if (need_event) {
6350                                         add_fsevent(FSE_DELETE, ctx,
6351                                                         FSE_ARG_STRING, len, path,
6352                                                         FSE_ARG_FINFO, &finfo,
6353                                                         FSE_ARG_DONE);
6354                                 }
6355 #endif
6356                         }
6357                         if (path != NULL)
6358                                 RELEASE_PATH(path);
6359                 }
6360
6361 out:
6362                 /*
6363                  * nameidone has to happen before we vnode_put(dvp)
6364                  * since it may need to release the fs_nodelock on the dvp
6365                  */
6366                 nameidone(&nd);
6367
6368                 vnode_put(dvp);
6369                 vnode_put(vp);
6370
6371                 if (restart_flag == 0) {
6372                         wakeup_one((caddr_t)vp);
6373                         return (error);
6374                 }
6375                 tsleep(vp, PVFS, "rm AD", 1);
6376
6377         } while (restart_flag != 0);
6378
6379         return (error);
6380
6381 }
6382
6383 /* Get direntry length padded to 8 byte alignment */
6384 #define DIRENT64_LEN(namlen) \
6385         ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
6386
6387 static errno_t
6388 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
6389                 int *numdirent, vfs_context_t ctxp)
6390 {
6391         /* Check if fs natively supports VNODE_READDIR_EXTENDED */
6392         if (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) {
6393                 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
6394         } else {
6395                 size_t bufsize;
6396                 void * bufptr;
6397                 uio_t auio;
6398                 struct direntry entry64;
6399                 struct dirent *dep;
6400                 int bytesread;
6401                 int error;
6402
6403                 /*
6404                  * Our kernel buffer needs to be smaller since re-packing
6405                  * will expand each dirent.  The worse case (when the name
6406                  * length is 3) corresponds to a struct direntry size of 32
6407                  * bytes (8-byte aligned) and a struct dirent size of 12 bytes
6408                  * (4-byte aligned).  So having a buffer that is 3/8 the size
6409                  * will prevent us from reading more than we can pack.
6410                  *
6411                  * Since this buffer is wired memory, we will limit the
6412                  * buffer size to a maximum of 32K. We would really like to
6413                  * use 32K in the MIN(), but we use magic number 87371 to
6414                  * prevent uio_resid() * 3 / 8 from overflowing.
6415                  */
6416                 bufsize = 3 * MIN(uio_resid(uio), 87371) / 8;
6417                 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
6418                 if (bufptr == NULL) {
6419                         return ENOMEM;
6420                 }
6421
6422                 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
6423                 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
6424                 auio->uio_offset = uio->uio_offset;
6425
6426                 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
6427
6428                 dep = (struct dirent *)bufptr;
6429                 bytesread = bufsize - uio_resid(auio);
6430
6431                 /*
6432                  * Convert all the entries and copy them out to user's buffer.
6433                  */
6434                 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
6435                         /* Convert a dirent to a dirent64. */
6436                         entry64.d_ino = dep->d_ino;
6437                         entry64.d_seekoff = 0;
6438                         entry64.d_reclen = DIRENT64_LEN(dep->d_namlen);
6439                         entry64.d_namlen = dep->d_namlen;
6440                         entry64.d_type = dep->d_type;
6441                         bcopy(dep->d_name, entry64.d_name, dep->d_namlen + 1);
6442
6443                         /* Move to next entry. */
6444                         dep = (struct dirent *)((char *)dep + dep->d_reclen);
6445
6446                         /* Copy entry64 to user's buffer. */
6447                         error = uiomove((caddr_t)&entry64, entry64.d_reclen, uio);
6448                 }
6449
6450                 /* Update the real offset using the offset we got from VNOP_READDIR. */
6451                 if (error == 0) {
6452                         uio->uio_offset = auio->uio_offset;
6453                 }
6454                 uio_free(auio);
6455                 FREE(bufptr, M_TEMP);
6456                 return (error);
6457         }
6458 }
6459
6460 /*
6461  * Read a block of directory entries in a file system independent format.
6462  */
6463 static int
6464 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
6465                      off_t *offset, int flags)
6466 {
6467         vnode_t vp;
6468         struct vfs_context context = *vfs_context_current();    /* local copy */
6469         struct fileproc *fp;
6470         uio_t auio;
6471         int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6472         off_t loff;
6473         int error, eofflag, numdirent;
6474         char uio_buf[ UIO_SIZEOF(1) ];
6475
6476         error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
6477         if (error) {
6478                 return (error);
6479         }
6480         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
6481                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6482                 error = EBADF;
6483                 goto out;
6484         }
6485
6486 #if CONFIG_MACF
6487         error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
6488         if (error)
6489                 goto out;
6490 #endif
6491         if ( (error = vnode_getwithref(vp)) ) {
6492                 goto out;
6493         }
6494         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6495
6496 unionread:
6497         if (vp->v_type != VDIR) {
6498                 (void)vnode_put(vp);
6499                 error = EINVAL;
6500                 goto out;
6501         }
6502
6503 #if CONFIG_MACF
6504         error = mac_vnode_check_readdir(&context, vp);
6505         if (error != 0) {
6506                 (void)vnode_put(vp);
6507                 goto out;
6508         }
6509 #endif /* MAC */
6510
6511         loff = fp->f_fglob->fg_offset;
6512         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
6513         uio_addiov(auio, bufp, bufsize);
6514
6515         if (flags & VNODE_READDIR_EXTENDED) {
6516                 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
6517                 fp->f_fglob->fg_offset = uio_offset(auio);
6518         } else {
6519                 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
6520                 fp->f_fglob->fg_offset = uio_offset(auio);
6521         }
6522         if (error) {
6523                 (void)vnode_put(vp);
6524                 goto out;
6525         }
6526
6527         if ((user_ssize_t)bufsize == uio_resid(auio)){
6528                 if (union_dircheckp) {
6529                         error = union_dircheckp(&vp, fp, &context);
6530                         if (error == -1)
6531                                 goto unionread;
6532                         if (error)
6533                                 goto out;
6534                 }
6535
6536                 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) {
6537                         struct vnode *tvp = vp;
6538                         vp = vp->v_mount->mnt_vnodecovered;
6539                         vnode_getwithref(vp);
6540                         vnode_ref(vp);
6541                         fp->f_fglob->fg_data = (caddr_t) vp;
6542                         fp->f_fglob->fg_offset = 0;
6543                         vnode_rele(tvp);
6544                         vnode_put(tvp);
6545                         goto unionread;
6546                 }
6547         }
6548
6549         vnode_put(vp);
6550         if (offset) {
6551                 *offset = loff;
6552         }
6553
6554         *bytesread = bufsize - uio_resid(auio);
6555 out:
6556         file_drop(fd);
6557         return (error);
6558 }
6559
6560
6561 int
6562 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
6563 {
6564         off_t offset;
6565         ssize_t bytesread;
6566         int error;
6567
6568         AUDIT_ARG(fd, uap->fd);
6569         error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
6570
6571         if (error == 0) {
6572                 if (proc_is64bit(p)) {
6573                         user64_long_t base = (user64_long_t)offset;
6574                         error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
6575                 } else {
6576                         user32_long_t base = (user32_long_t)offset;
6577                         error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
6578                 }
6579                 *retval = bytesread;
6580         }
6581         return (error);
6582 }
6583
6584 int
6585 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
6586 {
6587         off_t offset;
6588         ssize_t bytesread;
6589         int error;
6590
6591         AUDIT_ARG(fd, uap->fd);
6592         error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
6593
6594         if (error == 0) {
6595                 *retval = bytesread;
6596                 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
6597         }
6598         return (error);
6599 }
6600
6601
6602 /*
6603  * Set the mode mask for creation of filesystem nodes.
6604  * XXX implement xsecurity
6605  */
6606 #define UMASK_NOXSECURITY        (void *)1      /* leave existing xsecurity alone */
6607 static int
6608 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
6609 {
6610         struct filedesc *fdp;
6611
6612         AUDIT_ARG(mask, newmask);
6613         proc_fdlock(p);
6614         fdp = p->p_fd;
6615         *retval = fdp->fd_cmask;
6616         fdp->fd_cmask = newmask & ALLPERMS;
6617         proc_fdunlock(p);
6618         return (0);
6619 }
6620
6621 /*
6622  * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
6623  *
6624  * Parameters:    p                       Process requesting to set the umask
6625  *                uap                     User argument descriptor (see below)
6626  *                retval                  umask of the process (parameter p)
6627  *
6628  * Indirect:      uap->newmask            umask to set
6629  *                uap->xsecurity          ACL to set
6630  *
6631  * Returns:        0                      Success
6632  *                !0                      Not success
6633  *
6634  */
6635 int
6636 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
6637 {
6638         int ciferror;
6639         kauth_filesec_t xsecdst;
6640
6641         xsecdst = KAUTH_FILESEC_NONE;
6642         if (uap->xsecurity != USER_ADDR_NULL) {
6643                 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6644                         return ciferror;
6645         } else {
6646                 xsecdst = KAUTH_FILESEC_NONE;
6647         }
6648
6649         ciferror = umask1(p, uap->newmask, xsecdst, retval);
6650
6651         if (xsecdst != KAUTH_FILESEC_NONE)
6652                 kauth_filesec_free(xsecdst);
6653         return ciferror;
6654 }
6655
6656 int
6657 umask(proc_t p, struct umask_args *uap, int32_t *retval)
6658 {
6659         return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
6660 }
6661
6662 /*
6663  * Void all references to file by ripping underlying filesystem
6664  * away from vnode.
6665  */
6666 /* ARGSUSED */
6667 int
6668 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
6669 {
6670         vnode_t vp;
6671         struct vnode_attr va;
6672         vfs_context_t ctx = vfs_context_current();
6673         int error;
6674         struct nameidata nd;
6675
6676         NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
6677                 UIO_USERSPACE, uap->path, ctx);
6678         error = namei(&nd);
6679         if (error)
6680                 return (error);
6681         vp = nd.ni_vp;
6682
6683         nameidone(&nd);
6684
6685         if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
6686                 error = ENOTSUP;
6687                 goto out;
6688         }
6689
6690         if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
6691                 error = EBUSY;
6692                 goto out;
6693         }
6694
6695 #if CONFIG_MACF
6696         error = mac_vnode_check_revoke(ctx, vp);
6697         if (error)
6698                 goto out;
6699 #endif
6700
6701         VATTR_INIT(&va);
6702         VATTR_WANTED(&va, va_uid);
6703         if ((error = vnode_getattr(vp, &va, ctx)))
6704                 goto out;
6705         if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
6706             (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
6707                 goto out;
6708         if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
6709                 VNOP_REVOKE(vp, REVOKEALL, ctx);
6710 out:
6711         vnode_put(vp);
6712         return (error);
6713 }
6714
6715
6716 /*
6717  *  HFS/HFS PlUS SPECIFIC SYSTEM CALLS
6718  *  The following system calls are designed to support features
6719  *  which are specific to the HFS & HFS Plus volume formats
6720  */
6721
6722 #ifdef __APPLE_API_OBSOLETE
6723
6724 /************************************************/
6725 /* *** Following calls will be deleted soon *** */
6726 /************************************************/
6727
6728 /*
6729  * Make a complex file.  A complex file is one with multiple forks (data streams)
6730  */
6731 /* ARGSUSED */
6732 int
6733 mkcomplex(__unused proc_t p, __unused struct mkcomplex_args *uap, __unused int32_t *retval)
6734 {
6735         return (ENOTSUP);
6736 }
6737
6738 /*
6739  * Extended stat call which returns volumeid and vnodeid as well as other info
6740  */
6741 /* ARGSUSED */
6742 int
6743 statv(__unused proc_t p,
6744           __unused struct statv_args *uap,
6745           __unused int32_t *retval)
6746 {
6747         return (ENOTSUP);       /*  We'll just return an error for now */
6748
6749 } /* end of statv system call */
6750
6751 /*
6752 * Extended lstat call which returns volumeid and vnodeid as well as other info
6753 */
6754 /* ARGSUSED */
6755 int
6756 lstatv(__unused proc_t p,
6757            __unused struct lstatv_args *uap,
6758            __unused int32_t *retval)
6759 {
6760        return (ENOTSUP);        /*  We'll just return an error for now */
6761 } /* end of lstatv system call */
6762
6763 /*
6764 * Extended fstat call which returns volumeid and vnodeid as well as other info
6765 */
6766 /* ARGSUSED */
6767 int
6768 fstatv(__unused proc_t p,
6769            __unused struct fstatv_args *uap,
6770            __unused int32_t *retval)
6771 {
6772        return (ENOTSUP);        /*  We'll just return an error for now */
6773 } /* end of fstatv system call */
6774
6775
6776 /************************************************/
6777 /* *** Preceding calls will be deleted soon *** */
6778 /************************************************/
6779
6780 #endif /* __APPLE_API_OBSOLETE */
6781
6782 /*
6783 * Obtain attribute information on objects in a directory while enumerating
6784 * the directory.  This call does not yet support union mounted directories.
6785 * TO DO
6786 *  1.union mounted directories.
6787 */
6788
6789 /* ARGSUSED */
6790 int
6791 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
6792 {
6793         vnode_t vp;
6794         struct fileproc *fp;
6795         uio_t auio = NULL;
6796         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6797         uint32_t count;
6798         uint32_t newstate;
6799         int error, eofflag;
6800         uint32_t loff;
6801         struct attrlist attributelist;
6802         vfs_context_t ctx = vfs_context_current();
6803         int fd = uap->fd;
6804         char uio_buf[ UIO_SIZEOF(1) ];
6805         kauth_action_t action;
6806
6807         AUDIT_ARG(fd, fd);
6808
6809         /* Get the attributes into kernel space */
6810         if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
6811                 return(error);
6812         }
6813         if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
6814                 return(error);
6815         }
6816         if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
6817                 return (error);
6818         }
6819         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
6820                 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6821                 error = EBADF;
6822                 goto out;
6823         }
6824
6825
6826 #if CONFIG_MACF
6827         error = mac_file_check_change_offset(vfs_context_ucred(ctx),
6828             fp->f_fglob);
6829         if (error)
6830                 goto out;
6831 #endif
6832
6833
6834         if ( (error = vnode_getwithref(vp)) )
6835                 goto out;
6836
6837         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6838
6839         if (vp->v_type != VDIR) {
6840                 (void)vnode_put(vp);
6841                 error = EINVAL;
6842                 goto out;
6843         }
6844
6845 #if CONFIG_MACF
6846         error = mac_vnode_check_readdir(ctx, vp);
6847         if (error != 0) {
6848                 (void)vnode_put(vp);
6849                 goto out;
6850         }
6851 #endif /* MAC */
6852
6853         /* set up the uio structure which will contain the users return buffer */
6854         loff = fp->f_fglob->fg_offset;
6855         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ,
6856             &uio_buf[0], sizeof(uio_buf));
6857         uio_addiov(auio, uap->buffer, uap->buffersize);
6858
6859         /*
6860          * If the only item requested is file names, we can let that past with
6861          * just LIST_DIRECTORY.  If they want any other attributes, that means
6862          * they need SEARCH as well.
6863          */
6864         action = KAUTH_VNODE_LIST_DIRECTORY;
6865         if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
6866             attributelist.fileattr || attributelist.dirattr)
6867                 action |= KAUTH_VNODE_SEARCH;
6868
6869         if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
6870
6871                 /* Believe it or not, uap->options only has 32-bits of valid
6872                  * info, so truncate before extending again */
6873                 error = VNOP_READDIRATTR(vp, &attributelist, auio,
6874                                          count,
6875                                          (u_long)(uint32_t)uap->options, &newstate, &eofflag,
6876                                          &count, ctx);
6877         }
6878         (void)vnode_put(vp);
6879
6880         if (error)
6881                 goto out;
6882         fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
6883
6884         if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
6885                 goto out;
6886         if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
6887                 goto out;
6888         if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
6889                 goto out;
6890
6891         *retval = eofflag;  /* similar to getdirentries */
6892         error = 0;
6893 out:
6894         file_drop(fd);
6895         return (error); /* return error earlier, an retval of 0 or 1 now */
6896
6897 } /* end of getdirentryattr system call */
6898
6899 /*
6900 * Exchange data between two files
6901 */
6902
6903 /* ARGSUSED */
6904 int
6905 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
6906 {
6907
6908         struct nameidata fnd, snd;
6909         vfs_context_t ctx = vfs_context_current();
6910         vnode_t fvp;
6911         vnode_t svp;
6912         int error;
6913         u_int32_t nameiflags;
6914         char *fpath = NULL;
6915         char *spath = NULL;
6916         int   flen=0, slen=0;
6917         int from_truncated=0, to_truncated=0;
6918 #if CONFIG_FSE
6919         fse_info f_finfo, s_finfo;
6920 #endif
6921
6922         nameiflags = 0;
6923         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6924
6925     NDINIT(&fnd, LOOKUP, nameiflags | AUDITVNPATH1,
6926                 UIO_USERSPACE, uap->path1, ctx);
6927
6928     error = namei(&fnd);
6929     if (error)
6930         goto out2;
6931
6932         nameidone(&fnd);
6933         fvp = fnd.ni_vp;
6934
6935     NDINIT(&snd, LOOKUP | CN_NBMOUNTLOOK, nameiflags | AUDITVNPATH2,
6936                 UIO_USERSPACE, uap->path2, ctx);
6937
6938     error = namei(&snd);
6939     if (error) {
6940                 vnode_put(fvp);
6941                 goto out2;
6942     }
6943         nameidone(&snd);
6944         svp = snd.ni_vp;
6945
6946         /*
6947          * if the files are the same, return an inval error
6948          */
6949         if (svp == fvp) {
6950                 error = EINVAL;
6951                 goto out;
6952         }
6953
6954         /*
6955          * if the files are on different volumes, return an error
6956          */
6957         if (svp->v_mount != fvp->v_mount) {
6958                 error = EXDEV;
6959                 goto out;
6960         }
6961
6962 #if CONFIG_MACF
6963         error = mac_vnode_check_exchangedata(ctx,
6964             fvp, svp);
6965         if (error)
6966                 goto out;
6967 #endif
6968         if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
6969             ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
6970                 goto out;
6971
6972         if (
6973 #if CONFIG_FSE
6974         need_fsevent(FSE_EXCHANGE, fvp) ||
6975 #endif
6976         kauth_authorize_fileop_has_listeners()) {
6977                 GET_PATH(fpath);
6978                 GET_PATH(spath);
6979                 if (fpath == NULL || spath == NULL) {
6980                         error = ENOMEM;
6981                         goto out;
6982                 }
6983
6984                 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
6985                 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
6986
6987 #if CONFIG_FSE
6988                 get_fse_info(fvp, &f_finfo, ctx);
6989                 get_fse_info(svp, &s_finfo, ctx);
6990                 if (from_truncated || to_truncated) {
6991                         // set it here since only the f_finfo gets reported up to user space
6992                         f_finfo.mode |= FSE_TRUNCATED_PATH;
6993                 }
6994 #endif
6995         }
6996         /* Ok, make the call */
6997         error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
6998
6999         if (error == 0) {
7000             const char *tmpname;
7001
7002             if (fpath != NULL && spath != NULL) {
7003                     /* call out to allow 3rd party notification of exchangedata.
7004                      * Ignore result of kauth_authorize_fileop call.
7005                      */
7006                     kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
7007                                            (uintptr_t)fpath, (uintptr_t)spath);
7008             }
7009             name_cache_lock();
7010
7011             tmpname     = fvp->v_name;
7012             fvp->v_name = svp->v_name;
7013             svp->v_name = tmpname;
7014
7015             if (fvp->v_parent != svp->v_parent) {
7016                 vnode_t tmp;
7017
7018                 tmp           = fvp->v_parent;
7019                 fvp->v_parent = svp->v_parent;
7020                 svp->v_parent = tmp;
7021             }
7022             name_cache_unlock();
7023
7024 #if CONFIG_FSE
7025             if (fpath != NULL && spath != NULL) {
7026                     add_fsevent(FSE_EXCHANGE, ctx,
7027                                 FSE_ARG_STRING, flen, fpath,
7028                                 FSE_ARG_FINFO, &f_finfo,
7029                                 FSE_ARG_STRING, slen, spath,
7030                                 FSE_ARG_FINFO, &s_finfo,
7031                                 FSE_ARG_DONE);
7032             }
7033 #endif
7034         }
7035
7036 out:
7037         if (fpath != NULL)
7038                 RELEASE_PATH(fpath);
7039         if (spath != NULL)
7040                 RELEASE_PATH(spath);
7041         vnode_put(svp);
7042         vnode_put(fvp);
7043 out2:
7044         return (error);
7045 }
7046
7047
7048 /* ARGSUSED */
7049
7050 int
7051 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
7052 {
7053         vnode_t vp;
7054         int error=0;
7055         int fserror = 0;
7056         struct nameidata nd;
7057         struct user64_fssearchblock searchblock;
7058         struct searchstate *state;
7059         struct attrlist *returnattrs;
7060         struct timeval timelimit;
7061         void *searchparams1,*searchparams2;
7062         uio_t auio = NULL;
7063         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7064         uint32_t nummatches;
7065         int mallocsize;
7066         uint32_t nameiflags;
7067         vfs_context_t ctx = vfs_context_current();
7068         char uio_buf[ UIO_SIZEOF(1) ];
7069
7070         /* Start by copying in fsearchblock paramater list */
7071     if (IS_64BIT_PROCESS(p)) {
7072         error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
7073         timelimit.tv_sec = searchblock.timelimit.tv_sec;
7074         timelimit.tv_usec = searchblock.timelimit.tv_usec;
7075     }
7076     else {
7077         struct user32_fssearchblock tmp_searchblock;
7078
7079         error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
7080         // munge into 64-bit version
7081         searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
7082         searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
7083         searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
7084         searchblock.maxmatches = tmp_searchblock.maxmatches;
7085                 /*
7086                  * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
7087                  * from a 32 bit long, and tv_usec is already a signed 32 bit int.
7088                  */
7089         timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
7090         timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
7091         searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
7092         searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
7093         searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
7094         searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
7095         searchblock.searchattrs = tmp_searchblock.searchattrs;
7096     }
7097         if (error)
7098                 return(error);
7099
7100         /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
7101          */
7102         if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
7103                 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
7104                 return(EINVAL);
7105
7106         /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
7107         /* It all has to do into local memory and it's not that big so we might as well  put it all together. */
7108         /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
7109         /* block.                                                                                             */
7110
7111         mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
7112                       sizeof(struct attrlist) + sizeof(struct searchstate);
7113
7114         MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
7115
7116         /* Now set up the various pointers to the correct place in our newly allocated memory */
7117
7118         searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
7119         returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
7120         state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
7121
7122         /* Now copy in the stuff given our local variables. */
7123
7124         if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
7125                 goto freeandexit;
7126
7127         if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
7128                 goto freeandexit;
7129
7130         if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
7131                 goto freeandexit;
7132
7133         if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
7134                 goto freeandexit;
7135
7136
7137         /*
7138          * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
7139          * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
7140          * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
7141          * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
7142          * validate the user-supplied data offset of the attrreference_t, we'll do it here.
7143          */
7144
7145         if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
7146                 attrreference_t* string_ref;
7147                 u_int32_t* start_length;
7148                 user64_size_t param_length;
7149
7150                 /* validate searchparams1 */
7151                 param_length = searchblock.sizeofsearchparams1;
7152                 /* skip the word that specifies length of the buffer */
7153                 start_length= (u_int32_t*) searchparams1;
7154                 start_length= start_length+1;
7155                 string_ref= (attrreference_t*) start_length;
7156
7157                 /* ensure no negative offsets or too big offsets */
7158                 if (string_ref->attr_dataoffset < 0 ) {
7159                         error = EINVAL;
7160                         goto freeandexit;
7161                 }
7162                 if (string_ref->attr_length > MAXPATHLEN) {
7163                         error = EINVAL;
7164                         goto freeandexit;
7165                 }
7166
7167                 /* Check for pointer overflow in the string ref */
7168                 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
7169                         error = EINVAL;
7170                         goto freeandexit;
7171                 }
7172
7173                 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
7174                         error = EINVAL;
7175                         goto freeandexit;
7176                 }
7177                 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
7178                         error = EINVAL;
7179                         goto freeandexit;
7180                 }
7181         }
7182
7183         /* set up the uio structure which will contain the users return buffer */
7184         auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
7185                                                                   &uio_buf[0], sizeof(uio_buf));
7186     uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
7187
7188         nameiflags = 0;
7189         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
7190         NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1,
7191                 UIO_USERSPACE, uap->path, ctx);
7192
7193         error = namei(&nd);
7194         if (error)
7195                 goto freeandexit;
7196
7197         nameidone(&nd);
7198         vp = nd.ni_vp;
7199
7200
7201         /*
7202          * If searchblock.maxmatches == 0, then skip the search. This has happened
7203          * before and sometimes the underlyning code doesnt deal with it well.
7204          */
7205          if (searchblock.maxmatches == 0) {
7206                 nummatches = 0;
7207                 goto saveandexit;
7208          }
7209
7210         /*
7211            Allright, we have everything we need, so lets make that call.
7212
7213            We keep special track of the return value from the file system:
7214            EAGAIN is an acceptable error condition that shouldn't keep us
7215            from copying out any results...
7216          */
7217
7218         fserror = VNOP_SEARCHFS(vp,
7219                                                         searchparams1,
7220                                                         searchparams2,
7221                                                         &searchblock.searchattrs,
7222                                                         (u_long)searchblock.maxmatches,
7223                                                         &timelimit,
7224                                                         returnattrs,
7225                                                         &nummatches,
7226                                                         (u_long)uap->scriptcode,
7227                                                         (u_long)uap->options,
7228                                                         auio,
7229                                                         state,
7230                                                         ctx);
7231
7232 saveandexit:
7233
7234         vnode_put(vp);
7235
7236         /* Now copy out the stuff that needs copying out. That means the number of matches, the
7237            search state.  Everything was already put into he return buffer by the vop call. */
7238
7239         if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
7240                 goto freeandexit;
7241
7242     if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
7243                 goto freeandexit;
7244
7245         error = fserror;
7246
7247 freeandexit:
7248
7249         FREE(searchparams1,M_TEMP);
7250
7251         return(error);
7252
7253
7254 } /* end of searchfs system call */
7255
7256
7257 /*
7258  * Make a filesystem-specific control call:
7259  */
7260 /* ARGSUSED */
7261 static int
7262 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
7263 {
7264         int error=0;
7265         boolean_t is64bit;
7266         u_int size;
7267 #define STK_PARAMS 128
7268         char stkbuf[STK_PARAMS];
7269         caddr_t data, memp;
7270         vnode_t vp = *arg_vp;
7271
7272         size = IOCPARM_LEN(cmd);
7273         if (size > IOCPARM_MAX) return (EINVAL);
7274
7275     is64bit = proc_is64bit(p);
7276
7277         memp = NULL;
7278         if (size > sizeof (stkbuf)) {
7279                 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
7280                 data = memp;
7281         } else {
7282                 data = &stkbuf[0];
7283         };
7284
7285         if (cmd & IOC_IN) {
7286                 if (size) {
7287                         error = copyin(udata, data, size);
7288                         if (error) goto FSCtl_Exit;
7289                 } else {
7290                     if (is64bit) {
7291                         *(user_addr_t *)data = udata;
7292                     }
7293                     else {
7294                         *(uint32_t *)data = (uint32_t)udata;
7295                     }
7296                 };
7297         } else if ((cmd & IOC_OUT) && size) {
7298                 /*
7299                  * Zero the buffer so the user always
7300                  * gets back something deterministic.
7301                  */
7302                 bzero(data, size);
7303         } else if (cmd & IOC_VOID) {
7304                 if (is64bit) {
7305                     *(user_addr_t *)data = udata;
7306                 }
7307                 else {
7308                     *(uint32_t *)data = (uint32_t)udata;
7309                 }
7310         }
7311
7312         /* Check to see if it's a generic command */
7313         if (IOCBASECMD(cmd) == FSCTL_SYNC_VOLUME) {
7314                 mount_t mp = vp->v_mount;
7315                 int arg = *(uint32_t*)data;
7316
7317                 /* record vid of vp so we can drop it below. */
7318                 uint32_t vvid = vp->v_id;
7319
7320                 /*
7321                  * Then grab mount_iterref so that we can release the vnode.
7322                  * Without this, a thread may call vnode_iterate_prepare then
7323                  * get into a deadlock because we've never released the root vp
7324                  */
7325                 error = mount_iterref (mp, 0);
7326                 if (error)  {
7327                         goto FSCtl_Exit;
7328                 }
7329                 vnode_put(vp);
7330
7331                 /* issue the sync for this volume */
7332                 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
7333
7334                 /*
7335                  * Then release the mount_iterref once we're done syncing; it's not
7336                  * needed for the VNOP_IOCTL below
7337                  */
7338                 mount_iterdrop(mp);
7339
7340                 if (arg & FSCTL_SYNC_FULLSYNC) {
7341                         /* re-obtain vnode iocount on the root vp, if possible */
7342                         error = vnode_getwithvid (vp, vvid);
7343                         if (error == 0) {
7344                                 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
7345                                 vnode_put (vp);
7346                         }
7347                 }
7348                 /* mark the argument VP as having been released */
7349                 *arg_vp = NULL;
7350
7351         } else if (IOCBASECMD(cmd) == FSCTL_SET_PACKAGE_EXTS) {
7352             user_addr_t ext_strings;
7353             uint32_t    num_entries;
7354             uint32_t    max_width;
7355
7356             if (   (is64bit && size != sizeof(user64_package_ext_info))
7357                 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
7358
7359                 // either you're 64-bit and passed a 64-bit struct or
7360                 // you're 32-bit and passed a 32-bit struct.  otherwise
7361                 // it's not ok.
7362                 error = EINVAL;
7363                 goto FSCtl_Exit;
7364             }
7365
7366             if (is64bit) {
7367                 ext_strings = ((user64_package_ext_info *)data)->strings;
7368                 num_entries = ((user64_package_ext_info *)data)->num_entries;
7369                 max_width   = ((user64_package_ext_info *)data)->max_width;
7370             } else {
7371                 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
7372                 num_entries = ((user32_package_ext_info *)data)->num_entries;
7373                 max_width   = ((user32_package_ext_info *)data)->max_width;
7374             }
7375
7376             error = set_package_extensions_table(ext_strings, num_entries, max_width);
7377
7378         } else if (IOCBASECMD(cmd) == FSCTL_WAIT_FOR_SYNC) {
7379                 error = tsleep((caddr_t)&sync_wait_time, PVFS|PCATCH, "sync-wait", 0);
7380                 if (error == 0) {
7381                         *(uint32_t *)data = (uint32_t)sync_wait_time;
7382                         error = 0;
7383                 } else {
7384                         error *= -1;
7385                 }
7386
7387         } else {
7388                 /* Invoke the filesystem-specific code */
7389                 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
7390         }
7391
7392
7393         /*
7394          * Copy any data to user, size was
7395          * already set and checked above.
7396          */
7397         if (error == 0 && (cmd & IOC_OUT) && size)
7398                 error = copyout(data, udata, size);
7399
7400 FSCtl_Exit:
7401         if (memp) kfree(memp, size);
7402
7403         return error;
7404 }
7405
7406 /* ARGSUSED */
7407 int
7408 fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
7409 {
7410         int error;
7411         struct nameidata nd;
7412         u_long nameiflags;
7413         vnode_t vp = NULL;
7414         vfs_context_t ctx = vfs_context_current();
7415
7416         AUDIT_ARG(cmd, uap->cmd);
7417         AUDIT_ARG(value32, uap->options);
7418         /* Get the vnode for the file we are getting info on:  */
7419         nameiflags = 0;
7420         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
7421         NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, UIO_USERSPACE,
7422             uap->path, ctx);
7423         if ((error = namei(&nd))) goto done;
7424         vp = nd.ni_vp;
7425         nameidone(&nd);
7426
7427 #if CONFIG_MACF
7428         error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
7429         if (error) {
7430                 goto done;
7431         }
7432 #endif
7433
7434         error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
7435
7436 done:
7437         if (vp)
7438                 vnode_put(vp);
7439         return error;
7440 }
7441 /* ARGSUSED */
7442 int
7443 ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
7444 {
7445         int error;
7446         vnode_t vp = NULL;
7447         vfs_context_t ctx = vfs_context_current();
7448         int fd = -1;
7449
7450         AUDIT_ARG(fd, uap->fd);
7451         AUDIT_ARG(cmd, uap->cmd);
7452         AUDIT_ARG(value32, uap->options);
7453
7454         /* Get the vnode for the file we are getting info on:  */
7455         if ((error = file_vnode(uap->fd, &vp)))
7456                 goto done;
7457         fd = uap->fd;
7458         if ((error = vnode_getwithref(vp))) {
7459                 goto done;
7460         }
7461
7462 #if CONFIG_MACF
7463         error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
7464         if (error) {
7465                 goto done;
7466         }
7467 #endif
7468
7469         error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
7470
7471 done:
7472         if (fd != -1)
7473                 file_drop(fd);
7474
7475         if (vp)
7476                 vnode_put(vp);
7477         return error;
7478 }
7479 /* end of fsctl system call */
7480
7481 /*
7482  * An in-kernel sync for power management to call.
7483  */
7484 __private_extern__ int
7485 sync_internal(void)
7486 {
7487         int error;
7488
7489         struct sync_args data;
7490
7491         int retval[2];
7492
7493
7494         error = sync(current_proc(), &data, &retval[0]);
7495
7496
7497         return (error);
7498 } /* end of sync_internal call */
7499
7500
7501 /*
7502  *  Retrieve the data of an extended attribute.
7503  */
7504 int
7505 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
7506 {
7507         vnode_t vp;
7508         struct nameidata nd;
7509         char attrname[XATTR_MAXNAMELEN+1];
7510         vfs_context_t ctx = vfs_context_current();
7511         uio_t auio = NULL;
7512         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7513         size_t attrsize = 0;
7514         size_t namelen;
7515         u_int32_t nameiflags;
7516         int error;
7517         char uio_buf[ UIO_SIZEOF(1) ];
7518
7519         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
7520                 return (EINVAL);
7521
7522         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
7523         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
7524         if ((error = namei(&nd))) {
7525                 return (error);
7526         }
7527         vp = nd.ni_vp;
7528         nameidone(&nd);
7529
7530         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
7531                 goto out;
7532         }
7533         if (xattr_protected(attrname)) {
7534                 error = EPERM;
7535                 goto out;
7536         }
7537         /*
7538          * the specific check for 0xffffffff is a hack to preserve
7539          * binaray compatibilty in K64 with applications that discovered
7540          * that passing in a buf pointer and a size of -1 resulted in
7541          * just the size of the indicated extended attribute being returned.
7542          * this isn't part of the documented behavior, but because of the
7543          * original implemtation's check for "uap->size > 0", this behavior
7544          * was allowed. In K32 that check turned into a signed comparison
7545          * even though uap->size is unsigned...  in K64, we blow by that
7546          * check because uap->size is unsigned and doesn't get sign smeared
7547          * in the munger for a 32 bit user app.  we also need to add a
7548          * check to limit the maximum size of the buffer being passed in...
7549          * unfortunately, the underlying fileystems seem to just malloc
7550          * the requested size even if the actual extended attribute is tiny.
7551          * because that malloc is for kernel wired memory, we have to put a
7552          * sane limit on it.
7553          *
7554          * U32 running on K64 will yield 0x00000000ffffffff for uap->size
7555          * U64 running on K64 will yield -1 (64 bits wide)
7556          * U32/U64 running on K32 will yield -1 (32 bits wide)
7557          */
7558         if (uap->size == 0xffffffff || uap->size == (size_t)-1)
7559                 goto no_uio;
7560
7561         if (uap->size > (size_t)XATTR_MAXSIZE)
7562                 uap->size = XATTR_MAXSIZE;
7563
7564         if (uap->value) {
7565                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
7566                                             &uio_buf[0], sizeof(uio_buf));
7567                 uio_addiov(auio, uap->value, uap->size);
7568         }
7569 no_uio:
7570         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
7571 out:
7572         vnode_put(vp);
7573
7574         if (auio) {
7575                 *retval = uap->size - uio_resid(auio);
7576         } else {
7577                 *retval = (user_ssize_t)attrsize;
7578         }
7579
7580         return (error);
7581 }
7582
7583 /*
7584  * Retrieve the data of an extended attribute.
7585  */
7586 int
7587 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
7588 {
7589         vnode_t vp;
7590         char attrname[XATTR_MAXNAMELEN+1];
7591         uio_t auio = NULL;
7592         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7593         size_t attrsize = 0;
7594         size_t namelen;
7595         int error;
7596         char uio_buf[ UIO_SIZEOF(1) ];
7597
7598         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
7599                 return (EINVAL);
7600
7601         if ( (error = file_vnode(uap->fd, &vp)) ) {
7602                 return (error);
7603         }
7604         if ( (error = vnode_getwithref(vp)) ) {
7605                 file_drop(uap->fd);
7606                 return(error);
7607         }
7608         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
7609                 goto out;
7610         }
7611         if (xattr_protected(attrname)) {
7612                 error = EPERM;
7613                 goto out;
7614         }
7615         if (uap->value && uap->size > 0) {
7616                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
7617                                             &uio_buf[0], sizeof(uio_buf));
7618                 uio_addiov(auio, uap->value, uap->size);
7619         }
7620
7621         error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
7622 out:
7623         (void)vnode_put(vp);
7624         file_drop(uap->fd);
7625
7626         if (auio) {
7627                 *retval = uap->size - uio_resid(auio);
7628         } else {
7629                 *retval = (user_ssize_t)attrsize;
7630         }
7631         return (error);
7632 }
7633
7634 /*
7635  * Set the data of an extended attribute.
7636  */
7637 int
7638 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
7639 {
7640         vnode_t vp;
7641         struct nameidata nd;
7642         char attrname[XATTR_MAXNAMELEN+1];
7643         vfs_context_t ctx = vfs_context_current();
7644         uio_t auio = NULL;
7645         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7646         size_t namelen;
7647         u_int32_t nameiflags;
7648         int error;
7649         char uio_buf[ UIO_SIZEOF(1) ];
7650
7651         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
7652                 return (EINVAL);
7653
7654         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
7655                 return (error);
7656         }
7657         if (xattr_protected(attrname))
7658                 return(EPERM);
7659         if (uap->size != 0 && uap->value == 0) {
7660                 return (EINVAL);
7661         }
7662
7663         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
7664         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
7665         if ((error = namei(&nd))) {
7666                 return (error);
7667         }
7668         vp = nd.ni_vp;
7669         nameidone(&nd);
7670
7671         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
7672                                     &uio_buf[0], sizeof(uio_buf));
7673         uio_addiov(auio, uap->value, uap->size);
7674
7675         error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
7676 #if CONFIG_FSE
7677         if (error == 0) {
7678                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
7679                     FSE_ARG_VNODE, vp,
7680                     FSE_ARG_DONE);
7681         }
7682 #endif
7683         vnode_put(vp);
7684         *retval = 0;
7685         return (error);
7686 }
7687
7688 /*
7689  * Set the data of an extended attribute.
7690  */
7691 int
7692 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
7693 {
7694         vnode_t vp;
7695         char attrname[XATTR_MAXNAMELEN+1];
7696         uio_t auio = NULL;
7697         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7698         size_t namelen;
7699         int error;
7700         char uio_buf[ UIO_SIZEOF(1) ];
7701         vfs_context_t ctx = vfs_context_current();
7702
7703         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
7704                 return (EINVAL);
7705
7706         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
7707                 return (error);
7708         }
7709         if (xattr_protected(attrname))
7710                 return(EPERM);
7711         if (uap->size != 0 && uap->value == 0) {
7712                 return (EINVAL);
7713         }
7714         if ( (error = file_vnode(uap->fd, &vp)) ) {
7715                 return (error);
7716         }
7717         if ( (error = vnode_getwithref(vp)) ) {
7718                 file_drop(uap->fd);
7719                 return(error);
7720         }
7721         auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
7722                                     &uio_buf[0], sizeof(uio_buf));
7723         uio_addiov(auio, uap->value, uap->size);
7724
7725         error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
7726 #if CONFIG_FSE
7727         if (error == 0) {
7728                 add_fsevent(FSE_XATTR_MODIFIED, ctx,
7729                     FSE_ARG_VNODE, vp,
7730                     FSE_ARG_DONE);
7731         }
7732 #endif
7733         vnode_put(vp);
7734         file_drop(uap->fd);
7735         *retval = 0;
7736         return (error);
7737 }
7738
7739 /*
7740  * Remove an extended attribute.
7741  * XXX Code duplication here.
7742  */
7743 int
7744 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
7745 {
7746         vnode_t vp;
7747         struct nameidata nd;
7748         char attrname[XATTR_MAXNAMELEN+1];
7749         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7750         vfs_context_t ctx = vfs_context_current();
7751         size_t namelen;
7752         u_int32_t nameiflags;
7753         int error;
7754
7755         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
7756                 return (EINVAL);
7757
7758         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
7759         if (error != 0) {
7760                 return (error);
7761         }
7762         if (xattr_protected(attrname))
7763                 return(EPERM);
7764         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
7765         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
7766         if ((error = namei(&nd))) {
7767                 return (error);
7768         }
7769         vp = nd.ni_vp;
7770         nameidone(&nd);
7771
7772         error = vn_removexattr(vp, attrname, uap->options, ctx);
7773 #if CONFIG_FSE
7774         if (error == 0) {
7775                 add_fsevent(FSE_XATTR_REMOVED, ctx,
7776                     FSE_ARG_VNODE, vp,
7777                     FSE_ARG_DONE);
7778         }
7779 #endif
7780         vnode_put(vp);
7781         *retval = 0;
7782         return (error);
7783 }
7784
7785 /*
7786  * Remove an extended attribute.
7787  * XXX Code duplication here.
7788  */
7789 int
7790 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
7791 {
7792         vnode_t vp;
7793         char attrname[XATTR_MAXNAMELEN+1];
7794         size_t namelen;
7795         int error;
7796         vfs_context_t ctx = vfs_context_current();
7797
7798         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
7799                 return (EINVAL);
7800
7801         error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
7802         if (error != 0) {
7803                 return (error);
7804         }
7805         if (xattr_protected(attrname))
7806                 return(EPERM);
7807         if ( (error = file_vnode(uap->fd, &vp)) ) {
7808                 return (error);
7809         }
7810         if ( (error = vnode_getwithref(vp)) ) {
7811                 file_drop(uap->fd);
7812                 return(error);
7813         }
7814
7815         error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
7816 #if CONFIG_FSE
7817         if (error == 0) {
7818                 add_fsevent(FSE_XATTR_REMOVED, ctx,
7819                     FSE_ARG_VNODE, vp,
7820                     FSE_ARG_DONE);
7821         }
7822 #endif
7823         vnode_put(vp);
7824         file_drop(uap->fd);
7825         *retval = 0;
7826         return (error);
7827 }
7828
7829 /*
7830  * Retrieve the list of extended attribute names.
7831  * XXX Code duplication here.
7832  */
7833 int
7834 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
7835 {
7836         vnode_t vp;
7837         struct nameidata nd;
7838         vfs_context_t ctx = vfs_context_current();
7839         uio_t auio = NULL;
7840         int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7841         size_t attrsize = 0;
7842         u_int32_t nameiflags;
7843         int error;
7844         char uio_buf[ UIO_SIZEOF(1) ];
7845
7846         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
7847                 return (EINVAL);
7848
7849         nameiflags = ((uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW) | NOTRIGGER;
7850         NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
7851         if ((error = namei(&nd))) {
7852                 return (error);
7853         }
7854         vp = nd.ni_vp;
7855         nameidone(&nd);
7856         if (uap->namebuf != 0 && uap->bufsize > 0) {
7857                 auio = uio_createwithbuffer(1, 0, spacetype,
7858                                                                           UIO_READ, &uio_buf[0], sizeof(uio_buf));
7859                 uio_addiov(auio, uap->namebuf, uap->bufsize);
7860         }
7861
7862         error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
7863
7864         vnode_put(vp);
7865         if (auio) {
7866                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
7867         } else {
7868                 *retval = (user_ssize_t)attrsize;
7869         }
7870         return (error);
7871 }
7872
7873 /*
7874  * Retrieve the list of extended attribute names.
7875  * XXX Code duplication here.
7876  */
7877 int
7878 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
7879 {
7880         vnode_t vp;
7881         uio_t auio = NULL;
7882         int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7883         size_t attrsize = 0;
7884         int error;
7885         char uio_buf[ UIO_SIZEOF(1) ];
7886
7887         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
7888                 return (EINVAL);
7889
7890         if ( (error = file_vnode(uap->fd, &vp)) ) {
7891                 return (error);
7892         }
7893         if ( (error = vnode_getwithref(vp)) ) {
7894                 file_drop(uap->fd);
7895                 return(error);
7896         }
7897         if (uap->namebuf != 0 && uap->bufsize > 0) {
7898                 auio = uio_createwithbuffer(1, 0, spacetype,
7899                                                                           UIO_READ, &uio_buf[0], sizeof(uio_buf));
7900                 uio_addiov(auio, uap->namebuf, uap->bufsize);
7901         }
7902
7903         error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
7904
7905         vnode_put(vp);
7906         file_drop(uap->fd);
7907         if (auio) {
7908                 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
7909         } else {
7910                 *retval = (user_ssize_t)attrsize;
7911         }
7912         return (error);
7913 }
7914
7915 /*
7916  * Obtain the full pathname of a file system object by id.
7917  *
7918  * This is a private SPI used by the File Manager.
7919  */
7920 __private_extern__
7921 int
7922 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
7923 {
7924         vnode_t vp;
7925         struct mount *mp = NULL;
7926         vfs_context_t ctx = vfs_context_current();
7927         fsid_t fsid;
7928         char *realpath;
7929         int bpflags;
7930         int length;
7931         int error;
7932
7933         if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
7934                 return (error);
7935         }
7936         AUDIT_ARG(value32, fsid.val[0]);
7937         AUDIT_ARG(value64, uap->objid);
7938         /* Restrict output buffer size for now. */
7939         if (uap->bufsize > PAGE_SIZE) {
7940                 return (EINVAL);
7941         }
7942         MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
7943         if (realpath == NULL) {
7944                 return (ENOMEM);
7945         }
7946         /* Find the target mountpoint. */
7947         if ((mp = mount_lookupby_volfsid(fsid.val[0], 1)) == NULL) {
7948                 error = ENOTSUP;  /* unexpected failure */
7949                 goto out;
7950         }
7951         /* Find the target vnode. */
7952         if (uap->objid == 2) {
7953                 error = VFS_ROOT(mp, &vp, ctx);
7954         } else {
7955                 error = VFS_VGET(mp, (ino64_t)uap->objid, &vp, ctx);
7956         }
7957         vfs_unbusy(mp);
7958         if (error) {
7959                 goto out;
7960         }
7961         /* Obtain the absolute path to this vnode. */
7962         bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
7963         error = build_path(vp, realpath, uap->bufsize, &length, bpflags, ctx);
7964         vnode_put(vp);
7965         if (error) {
7966                 goto out;
7967         }
7968         AUDIT_ARG(text, realpath);
7969         error = copyout((caddr_t)realpath, uap->buf, length);
7970
7971         *retval = (user_ssize_t)length; /* may be superseded by error */
7972 out:
7973         if (realpath) {
7974                 FREE(realpath, M_TEMP);
7975         }
7976         return (error);
7977 }
7978
7979 /*
7980  * Common routine to handle various flavors of statfs data heading out
7981  *      to user space.
7982  *
7983  * Returns:     0                       Success
7984  *              EFAULT
7985  */
7986 static int
7987 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
7988     user_addr_t bufp, int *sizep, boolean_t is_64_bit,
7989     boolean_t partial_copy)
7990 {
7991         int             error;
7992         int             my_size, copy_size;
7993
7994         if (is_64_bit) {
7995                 struct user64_statfs sfs;
7996                 my_size = copy_size = sizeof(sfs);
7997                 bzero(&sfs, my_size);
7998                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
7999                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
8000                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
8001                 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
8002                 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
8003                 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
8004                 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
8005                 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
8006                 sfs.f_files = (user64_long_t)sfsp->f_files;
8007                 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
8008                 sfs.f_fsid = sfsp->f_fsid;
8009                 sfs.f_owner = sfsp->f_owner;
8010                 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
8011                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
8012                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
8013
8014                 if (partial_copy) {
8015                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
8016                 }
8017                 error = copyout((caddr_t)&sfs, bufp, copy_size);
8018         }
8019         else {
8020                 struct user32_statfs sfs;
8021
8022                 my_size = copy_size = sizeof(sfs);
8023                 bzero(&sfs, my_size);
8024
8025                 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
8026                 sfs.f_type = mp->mnt_vtable->vfc_typenum;
8027                 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
8028
8029                 /*
8030                  * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
8031                  * have to fudge the numbers here in that case.   We inflate the blocksize in order
8032                  * to reflect the filesystem size as best we can.
8033                  */
8034                 if ((sfsp->f_blocks > INT_MAX)
8035                         /* Hack for 4061702 . I think the real fix is for Carbon to
8036                          * look for some volume capability and not depend on hidden
8037                          * semantics agreed between a FS and carbon.
8038                          * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
8039                          * for Carbon to set bNoVolumeSizes volume attribute.
8040                          * Without this the webdavfs files cannot be copied onto
8041                          * disk as they look huge. This change should not affect
8042                          * XSAN as they should not setting these to -1..
8043                          */
8044                          && (sfsp->f_blocks != 0xffffffffffffffffULL)
8045                          && (sfsp->f_bfree != 0xffffffffffffffffULL)
8046                          && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
8047                         int             shift;
8048
8049                         /*
8050                          * Work out how far we have to shift the block count down to make it fit.
8051                          * Note that it's possible to have to shift so far that the resulting
8052                          * blocksize would be unreportably large.  At that point, we will clip
8053                          * any values that don't fit.
8054                          *
8055                          * For safety's sake, we also ensure that f_iosize is never reported as
8056                          * being smaller than f_bsize.
8057                          */
8058                         for (shift = 0; shift < 32; shift++) {
8059                                 if ((sfsp->f_blocks >> shift) <= INT_MAX)
8060                                         break;
8061                                 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
8062                                         break;
8063                         }
8064 #define __SHIFT_OR_CLIP(x, s)   ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
8065                         sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
8066                         sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
8067                         sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
8068 #undef __SHIFT_OR_CLIP
8069                         sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
8070                         sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
8071                 } else {
8072                         /* filesystem is small enough to be reported honestly */
8073                         sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
8074                         sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
8075                         sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
8076                         sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
8077                         sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
8078                 }
8079                 sfs.f_files = (user32_long_t)sfsp->f_files;
8080                 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
8081                 sfs.f_fsid = sfsp->f_fsid;
8082                 sfs.f_owner = sfsp->f_owner;
8083                 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
8084                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
8085                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
8086
8087                 if (partial_copy) {
8088                         copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
8089                 }
8090                 error = copyout((caddr_t)&sfs, bufp, copy_size);
8091         }
8092
8093         if (sizep != NULL) {
8094                 *sizep = my_size;
8095         }
8096         return(error);
8097 }
8098
8099 /*
8100  * copy stat structure into user_stat structure.
8101  */
8102 void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
8103 {
8104         bzero(usbp, sizeof(*usbp));
8105
8106         usbp->st_dev = sbp->st_dev;
8107         usbp->st_ino = sbp->st_ino;
8108         usbp->st_mode = sbp->st_mode;
8109         usbp->st_nlink = sbp->st_nlink;
8110         usbp->st_uid = sbp->st_uid;
8111         usbp->st_gid = sbp->st_gid;
8112         usbp->st_rdev = sbp->st_rdev;
8113 #ifndef _POSIX_C_SOURCE
8114         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
8115         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
8116         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
8117         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
8118         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
8119         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
8120 #else
8121         usbp->st_atime = sbp->st_atime;
8122         usbp->st_atimensec = sbp->st_atimensec;
8123         usbp->st_mtime = sbp->st_mtime;
8124         usbp->st_mtimensec = sbp->st_mtimensec;
8125         usbp->st_ctime = sbp->st_ctime;
8126         usbp->st_ctimensec = sbp->st_ctimensec;
8127 #endif
8128         usbp->st_size = sbp->st_size;
8129         usbp->st_blocks = sbp->st_blocks;
8130         usbp->st_blksize = sbp->st_blksize;
8131         usbp->st_flags = sbp->st_flags;
8132         usbp->st_gen = sbp->st_gen;
8133         usbp->st_lspare = sbp->st_lspare;
8134         usbp->st_qspare[0] = sbp->st_qspare[0];
8135         usbp->st_qspare[1] = sbp->st_qspare[1];
8136 }
8137
8138 void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
8139 {
8140         bzero(usbp, sizeof(*usbp));
8141
8142         usbp->st_dev = sbp->st_dev;
8143         usbp->st_ino = sbp->st_ino;
8144         usbp->st_mode = sbp->st_mode;
8145         usbp->st_nlink = sbp->st_nlink;
8146         usbp->st_uid = sbp->st_uid;
8147         usbp->st_gid = sbp->st_gid;
8148         usbp->st_rdev = sbp->st_rdev;
8149 #ifndef _POSIX_C_SOURCE
8150         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
8151         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
8152         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
8153         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
8154         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
8155         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
8156 #else
8157         usbp->st_atime = sbp->st_atime;
8158         usbp->st_atimensec = sbp->st_atimensec;
8159         usbp->st_mtime = sbp->st_mtime;
8160         usbp->st_mtimensec = sbp->st_mtimensec;
8161         usbp->st_ctime = sbp->st_ctime;
8162         usbp->st_ctimensec = sbp->st_ctimensec;
8163 #endif
8164         usbp->st_size = sbp->st_size;
8165         usbp->st_blocks = sbp->st_blocks;
8166         usbp->st_blksize = sbp->st_blksize;
8167         usbp->st_flags = sbp->st_flags;
8168         usbp->st_gen = sbp->st_gen;
8169         usbp->st_lspare = sbp->st_lspare;
8170         usbp->st_qspare[0] = sbp->st_qspare[0];
8171         usbp->st_qspare[1] = sbp->st_qspare[1];
8172 }
8173
8174 /*
8175  * copy stat64 structure into user_stat64 structure.
8176  */
8177 void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
8178 {
8179         bzero(usbp, sizeof(*usbp));
8180
8181         usbp->st_dev = sbp->st_dev;
8182         usbp->st_ino = sbp->st_ino;
8183         usbp->st_mode = sbp->st_mode;
8184         usbp->st_nlink = sbp->st_nlink;
8185         usbp->st_uid = sbp->st_uid;
8186         usbp->st_gid = sbp->st_gid;
8187         usbp->st_rdev = sbp->st_rdev;
8188 #ifndef _POSIX_C_SOURCE
8189         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
8190         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
8191         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
8192         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
8193         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
8194         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
8195         usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
8196         usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
8197 #else
8198         usbp->st_atime = sbp->st_atime;
8199         usbp->st_atimensec = sbp->st_atimensec;
8200         usbp->st_mtime = sbp->st_mtime;
8201         usbp->st_mtimensec = sbp->st_mtimensec;
8202         usbp->st_ctime = sbp->st_ctime;
8203         usbp->st_ctimensec = sbp->st_ctimensec;
8204         usbp->st_birthtime = sbp->st_birthtime;
8205         usbp->st_birthtimensec = sbp->st_birthtimensec;
8206 #endif
8207         usbp->st_size = sbp->st_size;
8208         usbp->st_blocks = sbp->st_blocks;
8209         usbp->st_blksize = sbp->st_blksize;
8210         usbp->st_flags = sbp->st_flags;
8211         usbp->st_gen = sbp->st_gen;
8212         usbp->st_lspare = sbp->st_lspare;
8213         usbp->st_qspare[0] = sbp->st_qspare[0];
8214         usbp->st_qspare[1] = sbp->st_qspare[1];
8215 }
8216
8217 void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
8218 {
8219         bzero(usbp, sizeof(*usbp));
8220
8221         usbp->st_dev = sbp->st_dev;
8222         usbp->st_ino = sbp->st_ino;
8223         usbp->st_mode = sbp->st_mode;
8224         usbp->st_nlink = sbp->st_nlink;
8225         usbp->st_uid = sbp->st_uid;
8226         usbp->st_gid = sbp->st_gid;
8227         usbp->st_rdev = sbp->st_rdev;
8228 #ifndef _POSIX_C_SOURCE
8229         usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
8230         usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
8231         usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
8232         usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
8233         usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
8234         usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
8235         usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
8236         usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
8237 #else
8238         usbp->st_atime = sbp->st_atime;
8239         usbp->st_atimensec = sbp->st_atimensec;
8240         usbp->st_mtime = sbp->st_mtime;
8241         usbp->st_mtimensec = sbp->st_mtimensec;
8242         usbp->st_ctime = sbp->st_ctime;
8243         usbp->st_ctimensec = sbp->st_ctimensec;
8244         usbp->st_birthtime = sbp->st_birthtime;
8245         usbp->st_birthtimensec = sbp->st_birthtimensec;
8246 #endif
8247         usbp->st_size = sbp->st_size;
8248         usbp->st_blocks = sbp->st_blocks;
8249         usbp->st_blksize = sbp->st_blksize;
8250         usbp->st_flags = sbp->st_flags;
8251         usbp->st_gen = sbp->st_gen;
8252         usbp->st_lspare = sbp->st_lspare;
8253         usbp->st_qspare[0] = sbp->st_qspare[0];
8254         usbp->st_qspare[1] = sbp->st_qspare[1];
8255 }