xnu-2782.20.48.tar.gz

[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
diff --git a/bsd/vfs/vfs_syscalls.c b/bsd/vfs/vfs_syscalls.c

index 23653799fbcec651a7849d6e7822fe97b0b0914e..ba37a4e38abddf71ed2b369c4e70e114928dc11e 100644 (file)
--- a/bsd/vfs/vfs_syscalls.c
+++ b/bsd/vfs/vfs_syscalls.c
@@ -1,5 +1,5 @@
  /*
  /*
- * Copyright (c) 1995-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 1995-2014 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -92,6 +92,7 @@
  #include <sys/quota.h>
  #include <sys/kdebug.h>
  #include <sys/fsevents.h>
  #include <sys/quota.h>
  #include <sys/kdebug.h>
  #include <sys/fsevents.h>
+#include <sys/imgsrc.h>
  #include <sys/sysproto.h>
  #include <sys/xattr.h>
  #include <sys/fcntl.h>
  #include <sys/sysproto.h>
  #include <sys/xattr.h>
  #include <sys/fcntl.h>
@@ -101,7 +102,6 @@
  #include <machine/cons.h>
  #include <machine/limits.h>
  #include <miscfs/specfs/specdev.h>
  #include <machine/cons.h>
  #include <machine/limits.h>
  #include <miscfs/specfs/specdev.h>
-#include <miscfs/union/union.h>
  
  #include <security/audit/audit.h>
  #include <bsm/audit_kevents.h>
  
  #include <security/audit/audit.h>
  #include <bsm/audit_kevents.h>
@@ -109,6 +109,7 @@
  #include <mach/mach_types.h>
  #include <kern/kern_types.h>
  #include <kern/kalloc.h>
  #include <mach/mach_types.h>
  #include <kern/kern_types.h>
  #include <kern/kalloc.h>
+#include <kern/task.h>
  
  #include <vm/vm_pageout.h>
  
  
  #include <vm/vm_pageout.h>
  
@@ -147,49 +148,48 @@ static int getfsstat_callback(mount_t mp, void * arg);
  static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
  static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
  static int sync_callback(mount_t, void *);
  static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
  static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
  static int sync_callback(mount_t, void *);
+static void sync_thread(void *, __unused wait_result_t);
+static int sync_async(int);
  static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, 
                         user_addr_t bufp, int *sizep, boolean_t is_64_bit, 
                                                 boolean_t partial_copy);
  static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
                         user_addr_t bufp);
  static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
  static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, 
                         user_addr_t bufp, int *sizep, boolean_t is_64_bit, 
                                                 boolean_t partial_copy);
  static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
                         user_addr_t bufp);
  static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
+static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
+                        struct componentname *cnp, user_addr_t fsmountargs,
+                        int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
+                        vfs_context_t ctx);
+void vfs_notify_mount(vnode_t pdvp);
+
+int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
+
+struct fd_vn_data * fg_vn_data_alloc(void);
+
+static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
+
+static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
+
+#ifdef CONFIG_IMGSRC_ACCESS
+static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
+static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
+static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
+static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
+static void mount_end_update(mount_t mp);
+static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
+#endif /* CONFIG_IMGSRC_ACCESS */
+
  int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
  
  __private_extern__
  int sync_internal(void);
  
  int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
  
  __private_extern__
  int sync_internal(void);
  
-__private_extern__
-int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, int32_t *);
-
  __private_extern__
  int unlink1(vfs_context_t, struct nameidata *, int);
  
  __private_extern__
  int unlink1(vfs_context_t, struct nameidata *, int);
  
-
-#ifdef __APPLE_API_OBSOLETE
-struct fstatv_args {
-       int fd;                 /* file descriptor of the target file */
-       struct vstat *vsb;      /* vstat structure for returned info  */
-};
-struct lstatv_args {
-       const char *path;       /* pathname of the target file       */
-       struct vstat *vsb;      /* vstat structure for returned info */
-};
-struct mkcomplex_args {
-        const char *path;      /* pathname of the file to be created */
-               mode_t mode;            /* access mode for the newly created file */
-        u_int32_t type;                /* format of the complex file */
-};
-struct statv_args {
-        const char *path;      /* pathname of the target file       */
-        struct vstat *vsb;     /* vstat structure for returned info */
-};
-
-int fstatv(proc_t p, struct fstatv_args *uap, int32_t *retval);
-int lstatv(proc_t p, struct lstatv_args *uap, int32_t *retval);
-int mkcomplex(proc_t p, struct mkcomplex_args *uap, int32_t *retval);
-int statv(proc_t p, struct statv_args *uap, int32_t *retval);
-
-#endif /* __APPLE_API_OBSOLETE */
+extern lck_grp_t *fd_vn_lck_grp;
+extern lck_grp_attr_t *fd_vn_lck_grp_attr;
+extern lck_attr_t *fd_vn_lck_attr;
  
  /*
   * incremented each time a mount or unmount operation occurs
  
  /*
   * incremented each time a mount or unmount operation occurs
@@ -201,14 +201,76 @@ uint32_t mount_generation = 0;
  /* counts number of mount and unmount operations */
  unsigned int vfs_nummntops=0;
  
  /* counts number of mount and unmount operations */
  unsigned int vfs_nummntops=0;
  
-extern struct fileops vnops;
+extern const struct fileops vnops;
+#if CONFIG_APPLEDOUBLE
  extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *); 
  extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *); 
+#endif /* CONFIG_APPLEDOUBLE */
  
  
+typedef uint32_t vfs_rename_flags_t;
+#if CONFIG_SECLUDED_RENAME
+enum {
+       VFS_SECLUDE_RENAME              = 0x00000001
+};
+#endif
  
  /*
   * Virtual File System System Calls
   */
  
  
  /*
   * Virtual File System System Calls
   */
  
+#if NFSCLIENT || DEVFS
+/*
+ * Private in-kernel mounting spi (NFS only, not exported)
+ */
+ __private_extern__
+boolean_t
+vfs_iskernelmount(mount_t mp)
+{
+       return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
+}
+
+ __private_extern__
+int
+kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
+             void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
+{
+       struct nameidata nd;
+       boolean_t did_namei;
+       int error;
+
+       NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT, 
+              UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
+
+       /*
+        * Get the vnode to be covered if it's not supplied
+        */
+       if (vp == NULLVP) {
+               error = namei(&nd);
+               if (error)
+                       return (error);
+               vp = nd.ni_vp;
+               pvp = nd.ni_dvp;
+               did_namei = TRUE;
+       } else {
+               char *pnbuf = CAST_DOWN(char *, path);
+
+               nd.ni_cnd.cn_pnbuf = pnbuf;
+               nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
+               did_namei = FALSE;
+       }
+
+       error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
+                            syscall_flags, kern_flags, NULL, TRUE, ctx);
+
+       if (did_namei) {
+               vnode_put(vp);
+               vnode_put(pvp);
+               nameidone(&nd);
+       }
+
+       return (error);
+}
+#endif /* NFSCLIENT || DEVFS */
+
  /*
   * Mount a file system.
   */
  /*
   * Mount a file system.
   */
@@ -226,6 +288,13 @@ mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
         return (__mac_mount(p, &muap, retval));
  }
  
         return (__mac_mount(p, &muap, retval));
  }
  
+void
+vfs_notify_mount(vnode_t pdvp) 
+{
+       vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
+       lock_vnode_and_post(pdvp, NOTE_WRITE);
+}
+
  /*
   * __mac_mount:
   *     Mount a file system taking into account MAC label behavior.
  /*
   * __mac_mount:
   *     Mount a file system taking into account MAC label behavior.
@@ -245,10 +314,174 @@ mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
   * Returns:        0                       Success
   *                !0                       Not success
   */
   * Returns:        0                       Success
   *                !0                       Not success
   */
+boolean_t root_fs_upgrade_try = FALSE;
+
  int
  __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
  {
  int
  __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
  {
-       struct vnode *vp, *pvp;
+       vnode_t pvp = NULL;
+       vnode_t vp = NULL;
+       int need_nameidone = 0;
+       vfs_context_t ctx = vfs_context_current();
+       char fstypename[MFSNAMELEN];
+       struct nameidata nd;
+       size_t dummy=0;
+       char *labelstr = NULL;
+       int flags = uap->flags;
+       int error;
+#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF 
+       boolean_t is_64bit = IS_64BIT_PROCESS(p);
+#else
+#pragma unused(p)
+#endif
+       /*
+        * Get the fs type name from user space
+        */
+       error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
+       if (error)
+               return (error);
+
+       /*
+        * Get the vnode to be covered
+        */
+       NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT, 
+              UIO_USERSPACE, uap->path, ctx);
+       error = namei(&nd);
+       if (error) {
+               goto out;
+       }
+       need_nameidone = 1;
+       vp = nd.ni_vp;
+       pvp = nd.ni_dvp;
+       
+#ifdef CONFIG_IMGSRC_ACCESS
+       /* Mounting image source cannot be batched with other operations */
+       if (flags == MNT_IMGSRC_BY_INDEX) {
+               error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
+                                                 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
+               goto out;
+       }
+#endif /* CONFIG_IMGSRC_ACCESS */
+
+#if CONFIG_MACF
+       /*
+        * Get the label string (if any) from user space
+        */
+       if (uap->mac_p != USER_ADDR_NULL) {
+               struct user_mac mac;
+               size_t ulen = 0;
+
+               if (is_64bit) {
+                       struct user64_mac mac64;
+                       error = copyin(uap->mac_p, &mac64, sizeof(mac64));
+                       mac.m_buflen = mac64.m_buflen;
+                       mac.m_string = mac64.m_string;
+               } else {
+                       struct user32_mac mac32;
+                       error = copyin(uap->mac_p, &mac32, sizeof(mac32));
+                       mac.m_buflen = mac32.m_buflen;
+                       mac.m_string = mac32.m_string;
+               }
+               if (error)
+                       goto out;
+               if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
+                   (mac.m_buflen < 2)) {
+                       error = EINVAL;
+                       goto out;
+               }
+               MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
+               error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
+               if (error) {
+                       goto out;
+               }
+               AUDIT_ARG(mac_string, labelstr);
+       }
+#endif /* CONFIG_MACF */
+
+       AUDIT_ARG(fflags, flags);
+
+       if ((vp->v_flag & VROOT) &&
+                       (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
+               if (!(flags & MNT_UNION)) {
+                       flags |= MNT_UPDATE;
+               }
+               else {
+                       /* 
+                        * For a union mount on '/', treat it as fresh
+                        * mount instead of update. 
+                        * Otherwise, union mouting on '/' used to panic the 
+                        * system before, since mnt_vnodecovered was found to 
+                        * be NULL for '/' which is required for unionlookup 
+                        * after it gets ENOENT on union mount.
+                        */
+                       flags = (flags & ~(MNT_UPDATE));
+               }
+
+#ifdef SECURE_KERNEL
+               if ((flags & MNT_RDONLY) == 0) {
+                       /* Release kernels are not allowed to mount "/" as rw */
+                       error = EPERM;
+                       goto out;       
+               }
+#endif
+               /*
+                * See 7392553 for more details on why this check exists.
+                * Suffice to say: If this check is ON and something tries
+                * to mount the rootFS RW, we'll turn off the codesign
+                * bitmap optimization.  
+                */        
+#if CHECK_CS_VALIDATION_BITMAP
+               if ((flags & MNT_RDONLY) == 0 ) {
+                       root_fs_upgrade_try = TRUE;
+               }
+#endif
+       }
+
+       error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
+                            labelstr, FALSE, ctx);
+
+out:
+
+#if CONFIG_MACF
+       if (labelstr)
+               FREE(labelstr, M_MACTEMP);
+#endif /* CONFIG_MACF */
+
+       if (vp) {
+               vnode_put(vp);
+       }
+       if (pvp) {
+               vnode_put(pvp);
+       }
+       if (need_nameidone) {
+               nameidone(&nd);
+       }
+
+       return (error);
+}
+
+/*
+ * common mount implementation (final stage of mounting)
+ 
+ * Arguments:
+ *  fstypename file system type (ie it's vfs name)
+ *  pvp                parent of covered vnode
+ *  vp         covered vnode
+ *  cnp                component name (ie path) of covered vnode
+ *  flags      generic mount flags
+ *  fsmountargs        file system specific data
+ *  labelstr   optional MAC label
+ *  kernelmount        TRUE for mounts initiated from inside the kernel
+ *  ctx                caller's context
+ */
+static int
+mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
+             struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
+             char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
+{
+#if !CONFIG_MACF
+#pragma unused(labelstr)
+#endif
         struct vnode *devvp = NULLVP;
         struct vnode *device_vnode = NULLVP;
  #if CONFIG_MACF
         struct vnode *devvp = NULLVP;
         struct vnode *device_vnode = NULLVP;
  #if CONFIG_MACF
@@ -256,48 +489,20 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
  #endif
         struct mount *mp;
         struct vfstable *vfsp = (struct vfstable *)0;
  #endif
         struct mount *mp;
         struct vfstable *vfsp = (struct vfstable *)0;
+       struct proc *p = vfs_context_proc(ctx);
         int error, flag = 0;
         int error, flag = 0;
-       struct vnode_attr va;
-       vfs_context_t ctx = vfs_context_current();
-       struct nameidata nd;
-       struct nameidata nd1;
-       char fstypename[MFSNAMELEN];
-       size_t dummy=0;
         user_addr_t devpath = USER_ADDR_NULL;
         user_addr_t devpath = USER_ADDR_NULL;
-       user_addr_t fsmountargs =  uap->data;
         int ronly = 0;
         int mntalloc = 0;
         boolean_t vfsp_ref = FALSE;
         int ronly = 0;
         int mntalloc = 0;
         boolean_t vfsp_ref = FALSE;
-       mode_t accessmode;
-       boolean_t is_64bit;
         boolean_t is_rwlock_locked = FALSE;
         boolean_t did_rele = FALSE;
         boolean_t have_usecount = FALSE;
  
         boolean_t is_rwlock_locked = FALSE;
         boolean_t did_rele = FALSE;
         boolean_t have_usecount = FALSE;
  
-       AUDIT_ARG(fflags, uap->flags);
-
-       is_64bit = proc_is64bit(p);
-
         /*
         /*
-        * Get vnode to be covered
+        * Process an update for an existing mount
          */
          */
-       NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1 | WANTPARENT, 
-                  UIO_USERSPACE, uap->path, ctx);
-       error = namei(&nd);
-       if (error)
-               return (error);
-       vp = nd.ni_vp;
-       pvp = nd.ni_dvp;
-       
-       if ((vp->v_flag & VROOT) &&
-               (vp->v_mount->mnt_flag & MNT_ROOTFS)) 
-                       uap->flags |= MNT_UPDATE;
-
-       error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
-       if (error)
-               goto out1;
-       
-       if (uap->flags & MNT_UPDATE) {
+       if (flags & MNT_UPDATE) {
                 if ((vp->v_flag & VROOT) == 0) {
                         error = EINVAL;
                         goto out1;
                 if ((vp->v_flag & VROOT) == 0) {
                         error = EINVAL;
                         goto out1;
@@ -318,11 +523,31 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
                  * We only allow the filesystem to be reloaded if it
                  * is currently mounted read-only.
                  */
                  * We only allow the filesystem to be reloaded if it
                  * is currently mounted read-only.
                  */
-               if ((uap->flags & MNT_RELOAD) &&
+               if ((flags & MNT_RELOAD) &&
                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
                         error = ENOTSUP;
                         goto out1;
                 }
                     ((mp->mnt_flag & MNT_RDONLY) == 0)) {
                         error = ENOTSUP;
                         goto out1;
                 }
+
+               /*
+                * If content protection is enabled, update mounts are not
+                * allowed to turn it off.
+                */
+               if ((mp->mnt_flag & MNT_CPROTECT) && 
+                          ((flags & MNT_CPROTECT) == 0)) {
+                       error = EINVAL;
+                       goto out1;
+               }
+
+#ifdef CONFIG_IMGSRC_ACCESS 
+               /* Can't downgrade the backer of the root FS */
+               if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
+                       (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
+                       error = ENOTSUP;
+                       goto out1;
+               }
+#endif /* CONFIG_IMGSRC_ACCESS */
+
                 /*
                  * Only root, or the user that did the original mount is
                  * permitted to update it.
                 /*
                  * Only root, or the user that did the original mount is
                  * permitted to update it.
@@ -334,7 +559,6 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
  #if CONFIG_MACF
                 error = mac_mount_check_remount(ctx, mp);
                 if (error != 0) {
  #if CONFIG_MACF
                 error = mac_mount_check_remount(ctx, mp);
                 if (error != 0) {
-                       lck_rw_done(&mp->mnt_rwlock);
                         goto out1;
                 }
  #endif
                         goto out1;
                 }
  #endif
@@ -342,48 +566,28 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
                  * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
                  * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
                  */
                  * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
                  * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
                  */
-               if (suser(vfs_context_ucred(ctx), NULL)) {
-                       uap->flags |= MNT_NOSUID | MNT_NODEV;
+               if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
+                       flags |= MNT_NOSUID | MNT_NODEV;
                         if (mp->mnt_flag & MNT_NOEXEC)
                         if (mp->mnt_flag & MNT_NOEXEC)
-                               uap->flags |= MNT_NOEXEC;
+                               flags |= MNT_NOEXEC;
                 }
                 flag = mp->mnt_flag;
  
                 }
                 flag = mp->mnt_flag;
  
-               mp->mnt_flag |=
-                   uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
+
+
+               mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
  
                 vfsp = mp->mnt_vtable;
                 goto update;
         }
  
                 vfsp = mp->mnt_vtable;
                 goto update;
         }
-       /*
-        * If the user is not root, ensure that they own the directory
-        * onto which we are attempting to mount.
-        */
-       VATTR_INIT(&va);
-       VATTR_WANTED(&va, va_uid);
-       if ((error = vnode_getattr(vp, &va, ctx)) ||
-           (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
-            (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))) {
-               goto out1;
-       }
         /*
          * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
          * MNT_NOEXEC if mount point is already MNT_NOEXEC.
          */
         /*
          * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
          * MNT_NOEXEC if mount point is already MNT_NOEXEC.
          */
-       if (suser(vfs_context_ucred(ctx), NULL)) {
-               uap->flags |= MNT_NOSUID | MNT_NODEV;
+       if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
+               flags |= MNT_NOSUID | MNT_NODEV;
                 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
                 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
-                       uap->flags |= MNT_NOEXEC;
-       }
-       if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
-               goto out1;
-
-       if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
-               goto out1;
-
-       if (vp->v_type != VDIR) {
-               error = ENOTDIR;
-               goto out1;
+                       flags |= MNT_NOEXEC;
         }
  
         /* XXXAUDIT: Should we capture the type on the error path as well? */
         }
  
         /* XXXAUDIT: Should we capture the type on the error path as well? */
@@ -400,22 +604,22 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
                 error = ENODEV;
                 goto out1;
         }
                 error = ENODEV;
                 goto out1;
         }
-#if CONFIG_MACF
-       error = mac_mount_check_mount(ctx, vp,
-           &nd.ni_cnd, vfsp->vfc_name);
-       if (error != 0)
+
+       /*
+        * VFC_VFSLOCALARGS is not currently supported for kernel mounts
+        */
+       if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
+               error = EINVAL;  /* unsupported request */
                 goto out1;
                 goto out1;
-#endif
-       if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
-               error = EBUSY;
+       }
+
+       error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
+       if (error != 0) {
                 goto out1;
         }
                 goto out1;
         }
-       vnode_lock_spin(vp);
-       SET(vp->v_flag, VMOUNT);
-       vnode_unlock(vp);
  
         /*
  
         /*
-        * Allocate and initialize the filesystem.
+        * Allocate and initialize the filesystem (mount_t)
          */
         MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
                 M_MOUNT, M_WAITOK);
          */
         MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
                 M_MOUNT, M_WAITOK);
@@ -445,34 +649,51 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
         mp->mnt_vtable = vfsp;
         //mp->mnt_stat.f_type = vfsp->vfc_typenum;
         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
         mp->mnt_vtable = vfsp;
         //mp->mnt_stat.f_type = vfsp->vfc_typenum;
         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
-       strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
-       strncpy(mp->mnt_vfsstat.f_mntonname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
+       strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
+       strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
         mp->mnt_vnodecovered = vp;
         mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
         mp->mnt_vnodecovered = vp;
         mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
-       mp->mnt_devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
+       mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
+       mp->mnt_devbsdunit = 0;
  
         /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
         vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
  
         /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
         vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
-       
+
+#if NFSCLIENT || DEVFS
+       if (kernelmount)
+               mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
+       if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
+               mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
+#endif /* NFSCLIENT || DEVFS */
+
  update:
         /*
          * Set the mount level flags.
          */
  update:
         /*
          * Set the mount level flags.
          */
-       if (uap->flags & MNT_RDONLY)
+       if (flags & MNT_RDONLY)
                 mp->mnt_flag |= MNT_RDONLY;
                 mp->mnt_flag |= MNT_RDONLY;
-       else if (mp->mnt_flag & MNT_RDONLY)
+       else if (mp->mnt_flag & MNT_RDONLY) {
+               // disallow read/write upgrades of file systems that
+               // had the TYPENAME_OVERRIDE feature set.
+               if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
+                       error = EPERM;
+                       goto out1;
+               }
                 mp->mnt_kern_flag |= MNTK_WANTRDWR;
                 mp->mnt_kern_flag |= MNTK_WANTRDWR;
+       }
         mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
                           MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
         mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
                           MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
-                         MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED |
-                         MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE);
-       mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
-                                     MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
-                                     MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED | 
-                                         MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE);
+                         MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
+                         MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
+                         MNT_QUARANTINE | MNT_CPROTECT);
+       mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
+                                MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
+                                MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
+                                MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
+                                MNT_QUARANTINE | MNT_CPROTECT);
  
  #if CONFIG_MACF
  
  #if CONFIG_MACF
-       if (uap->flags & MNT_MULTILABEL) {
+       if (flags & MNT_MULTILABEL) {
                 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
                         error = EINVAL;
                         goto out1;
                 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
                         error = EINVAL;
                         goto out1;
@@ -480,9 +701,11 @@ update:
                 mp->mnt_flag |= MNT_MULTILABEL;
         }
  #endif
                 mp->mnt_flag |= MNT_MULTILABEL;
         }
  #endif
-
+       /*
+        * Process device path for local file systems if requested
+        */
         if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
         if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
-               if (is_64bit) {
+               if (vfs_context_is64bit(ctx)) {
                         if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
                                 goto out1;      
                         fsmountargs += sizeof(devpath);
                         if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
                                 goto out1;      
                         fsmountargs += sizeof(devpath);
@@ -495,16 +718,18 @@ update:
                         fsmountargs += sizeof(tmp);
                 }
  
                         fsmountargs += sizeof(tmp);
                 }
  
-               /* if it is not update and device name needs to be parsed */
+               /* Lookup device and authorize access to it */
                 if ((devpath)) {
                 if ((devpath)) {
-                       NDINIT(&nd1, LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
-                       if ( (error = namei(&nd1)) )
+                       struct nameidata nd;
+
+                       NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
+                       if ( (error = namei(&nd)) )
                                 goto out1;
  
                                 goto out1;
  
-                       strncpy(mp->mnt_vfsstat.f_mntfromname, nd1.ni_cnd.cn_pnbuf, MAXPATHLEN);
-                       devvp = nd1.ni_vp;
+                       strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
+                       devvp = nd.ni_vp;
  
  
-                       nameidone(&nd1);
+                       nameidone(&nd);
  
                         if (devvp->v_type != VBLK) {
                                 error = ENOTBLK;
  
                         if (devvp->v_type != VBLK) {
                                 error = ENOTBLK;
@@ -519,14 +744,16 @@ update:
                         * permissions on the device.
                         */
                         if (suser(vfs_context_ucred(ctx), NULL) != 0) {
                         * permissions on the device.
                         */
                         if (suser(vfs_context_ucred(ctx), NULL) != 0) {
-                               accessmode = KAUTH_VNODE_READ_DATA;
+                               mode_t accessmode = KAUTH_VNODE_READ_DATA;
+
                                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
                                         accessmode |= KAUTH_VNODE_WRITE_DATA;
                                 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
                                         goto out2;
                         }
                 }
                                 if ((mp->mnt_flag & MNT_RDONLY) == 0)
                                         accessmode |= KAUTH_VNODE_WRITE_DATA;
                                 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
                                         goto out2;
                         }
                 }
-               if (devpath && ((uap->flags & MNT_UPDATE) == 0)) {
+               /* On first mount, preflight and open device */
+               if (devpath && ((flags & MNT_UPDATE) == 0)) {
                         if ( (error = vnode_ref(devvp)) )
                                 goto out2;
                         /*
                         if ( (error = vnode_ref(devvp)) )
                                 goto out2;
                         /*
@@ -562,114 +789,75 @@ update:
  
                         mp->mnt_devvp = devvp;
                         device_vnode = devvp;
  
                         mp->mnt_devvp = devvp;
                         device_vnode = devvp;
-               } else {
-                       if ((mp->mnt_flag & MNT_RDONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
-                               dev_t dev;
-                               int maj;
-                               /*
-                                * If upgrade to read-write by non-root, then verify
-                                * that user has necessary permissions on the device.
-                                */
-                               device_vnode = mp->mnt_devvp;
  
  
-                               if (device_vnode) {
-                                       vnode_getalways(device_vnode);
-
-                                       if (suser(vfs_context_ucred(ctx), NULL)) {
-                                               if ((error = vnode_authorize(device_vnode, NULL, 
-                                                                               KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) {
-                                                       vnode_put(device_vnode);
-                                                       goto out2;
-                                               }
-                                       }
+               } else if ((mp->mnt_flag & MNT_RDONLY) &&
+                          (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
+                          (device_vnode = mp->mnt_devvp)) {
+                       dev_t dev;
+                       int maj;
+                       /*
+                        * If upgrade to read-write by non-root, then verify
+                        * that user has necessary permissions on the device.
+                        */
+                       vnode_getalways(device_vnode);
  
  
-                                       /* Tell the device that we're upgrading */
-                                       dev = (dev_t)device_vnode->v_rdev;
-                                       maj = major(dev);
+                       if (suser(vfs_context_ucred(ctx), NULL) &&
+                           (error = vnode_authorize(device_vnode, NULL, 
+                            KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
+                            ctx)) != 0) {
+                               vnode_put(device_vnode);
+                               goto out2;
+                       }
  
  
-                                       if ((u_int)maj >= (u_int)nblkdev)
-                                               panic("Volume mounted on a device with invalid major number.\n");
+                       /* Tell the device that we're upgrading */
+                       dev = (dev_t)device_vnode->v_rdev;
+                       maj = major(dev);
  
  
-                                       error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
+                       if ((u_int)maj >= (u_int)nblkdev)
+                               panic("Volume mounted on a device with invalid major number.");
  
  
-                                       vnode_put(device_vnode);
-                                       if (error != 0) {
-                                               goto out2;
-                                       }
-                               }
-                       }
+                       error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
+                       vnode_put(device_vnode);
                         device_vnode = NULLVP;
                         device_vnode = NULLVP;
+                       if (error != 0) {
+                               goto out2;
+                       }
                 }
         }
  #if CONFIG_MACF
                 }
         }
  #if CONFIG_MACF
-       if ((uap->flags & MNT_UPDATE) == 0) {
+       if ((flags & MNT_UPDATE) == 0) {
                 mac_mount_label_init(mp);
                 mac_mount_label_associate(ctx, mp);
         }
                 mac_mount_label_init(mp);
                 mac_mount_label_associate(ctx, mp);
         }
-       if (uap->mac_p != USER_ADDR_NULL) {
-               struct user_mac mac;
-               char *labelstr = NULL;
-               size_t ulen = 0;
-
-               if ((uap->flags & MNT_UPDATE) != 0) {
-                       error = mac_mount_check_label_update(
-                           ctx, mp);
+       if (labelstr) {
+               if ((flags & MNT_UPDATE) != 0) {
+                       error = mac_mount_check_label_update(ctx, mp);
                         if (error != 0)
                                 goto out3;
                 }
                         if (error != 0)
                                 goto out3;
                 }
-               if (is_64bit) {
-                       error = copyin(uap->mac_p, &mac, sizeof(mac));
-               } else {
-                       struct mac mac32;
-                       error = copyin(uap->mac_p, &mac32, sizeof(mac32));
-                       mac.m_buflen = mac32.m_buflen;
-                       mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
-               }
-               if (error != 0)
-                       goto out3;
-               if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
-                   (mac.m_buflen < 2)) {
-                       error = EINVAL;
-                       goto out3;
-               }
-               MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
-               error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
-               if (error != 0) {
-                       FREE(labelstr, M_MACTEMP);
-                       goto out3;
-               }
-               AUDIT_ARG(mac_string, labelstr);
-               error = mac_mount_label_internalize(mp->mnt_mntlabel, labelstr);
-               FREE(labelstr, M_MACTEMP);
-               if (error != 0)
-                       goto out3;
         }
  #endif
         }
  #endif
-       if (device_vnode != NULL) {
-               VNOP_IOCTL(device_vnode, DKIOCGETBSDUNIT, (caddr_t)&mp->mnt_devbsdunit, 0, NULL);
-               mp->mnt_devbsdunit %= LOWPRI_MAX_NUM_DEV;
-       }
-
         /*
          * Mount the filesystem.
          */
         error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
  
         /*
          * Mount the filesystem.
          */
         error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
  
-       if (uap->flags & MNT_UPDATE) {
+       if (flags & MNT_UPDATE) {
                 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
                         mp->mnt_flag &= ~MNT_RDONLY;
                 mp->mnt_flag &=~
                     (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
                 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
                 if (error)
                 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
                         mp->mnt_flag &= ~MNT_RDONLY;
                 mp->mnt_flag &=~
                     (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
                 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
                 if (error)
-                       mp->mnt_flag = flag;
+                       mp->mnt_flag = flag;  /* restore flag value */
                 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
                 lck_rw_done(&mp->mnt_rwlock);
                 is_rwlock_locked = FALSE;
                 if (!error)
                         enablequotas(mp, ctx);
                 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
                 lck_rw_done(&mp->mnt_rwlock);
                 is_rwlock_locked = FALSE;
                 if (!error)
                         enablequotas(mp, ctx);
-               goto out2;
+               goto exit;
         }
         }
+
         /*
          * Put the new filesystem on the mount list after root.
          */
         /*
          * Put the new filesystem on the mount list after root.
          */
@@ -728,11 +916,14 @@ update:
                  */
                 (void)VFS_START(mp, 0, ctx);
  
                  */
                 (void)VFS_START(mp, 0, ctx);
  
-               error = mount_list_add(mp);
-               if (error != 0) {
+               if (mount_list_add(mp) != 0) {
+                       /*
+                        * The system is shutting down trying to umount
+                        * everything, so fail with a plausible errno.
+                        */
+                       error = EBUSY;
                         goto out4;
                 }
                         goto out4;
                 }
-
                 lck_rw_done(&mp->mnt_rwlock);
                 is_rwlock_locked = FALSE;
  
                 lck_rw_done(&mp->mnt_rwlock);
                 is_rwlock_locked = FALSE;
  
@@ -785,8 +976,14 @@ update:
                 } 
  
                 /* Now that mount is setup, notify the listeners */
                 } 
  
                 /* Now that mount is setup, notify the listeners */
-               vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
+               vfs_notify_mount(pvp);
         } else {
         } else {
+               /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
+               if (mp->mnt_vnodelist.tqh_first != NULL) {
+                       panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.", 
+                                       mp->mnt_vtable->vfc_name, error);
+               }
+
                 vnode_lock_spin(vp);
                 CLR(vp->v_flag, VMOUNT);
                 vnode_unlock(vp);
                 vnode_lock_spin(vp);
                 CLR(vp->v_flag, VMOUNT);
                 vnode_unlock(vp);
@@ -800,45 +997,60 @@ update:
                 }
                 lck_rw_done(&mp->mnt_rwlock);
                 is_rwlock_locked = FALSE;
                 }
                 lck_rw_done(&mp->mnt_rwlock);
                 is_rwlock_locked = FALSE;
+               
+               /*
+                * if we get here, we have a mount structure that needs to be freed,
+                * but since the coveredvp hasn't yet been updated to point at it,
+                * no need to worry about other threads holding a crossref on this mp
+                * so it's ok to just free it
+                */
                 mount_lock_destroy(mp);
  #if CONFIG_MACF
                 mac_mount_label_destroy(mp);
  #endif
                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
         }
                 mount_lock_destroy(mp);
  #if CONFIG_MACF
                 mac_mount_label_destroy(mp);
  #endif
                 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
         }
-       nameidone(&nd);
-
+exit:
         /*
         /*
-        * drop I/O count on covered 'vp' and
-        * on the device vp if there was one
+        * drop I/O count on the device vp if there was one
          */
         if (devpath && devvp)
                 vnode_put(devvp);
          */
         if (devpath && devvp)
                 vnode_put(devvp);
-       vnode_put(vp);
-
-       /* Note that we've changed something in the parent directory */
-       post_event_if_success(pvp, error, NOTE_WRITE);
-       vnode_put(pvp);
  
         return(error);
  
  
         return(error);
  
+/* Error condition exits */
  out4:
         (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
  out4:
         (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
+       
+       /* 
+        * If the mount has been placed on the covered vp,
+        * it may have been discovered by now, so we have
+        * to treat this just like an unmount
+        */
+       mount_lock_spin(mp);
+       mp->mnt_lflag |= MNT_LDEAD;
+       mount_unlock(mp);
+
         if (device_vnode != NULLVP) {
                 vnode_rele(device_vnode);
                 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
                         ctx);
                 did_rele = TRUE;
         }
         if (device_vnode != NULLVP) {
                 vnode_rele(device_vnode);
                 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
                         ctx);
                 did_rele = TRUE;
         }
+
         vnode_lock_spin(vp);
         vnode_lock_spin(vp);
+
+       mp->mnt_crossref++;
         vp->v_mountedhere = (mount_t) 0;
         vp->v_mountedhere = (mount_t) 0;
+
         vnode_unlock(vp);
         vnode_unlock(vp);
-       
+
         if (have_usecount) {
                 vnode_rele(vp);
         }
  out3:
         if (have_usecount) {
                 vnode_rele(vp);
         }
  out3:
-       if (devpath && ((uap->flags & MNT_UPDATE) == 0) && (!did_rele))
+       if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
                 vnode_rele(devvp);
  out2:
         if (devpath && devvp)
                 vnode_rele(devvp);
  out2:
         if (devpath && devvp)
@@ -848,295 +1060,180 @@ out1:
         if (is_rwlock_locked == TRUE) {
                 lck_rw_done(&mp->mnt_rwlock);
         }
         if (is_rwlock_locked == TRUE) {
                 lck_rw_done(&mp->mnt_rwlock);
         }
+       
         if (mntalloc) {
         if (mntalloc) {
+               if (mp->mnt_crossref)
+                       mount_dropcrossref(mp, vp, 0);
+               else {
+                       mount_lock_destroy(mp);
  #if CONFIG_MACF
  #if CONFIG_MACF
-               mac_mount_label_destroy(mp);
+                       mac_mount_label_destroy(mp);
  #endif
  #endif
-               FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
+                       FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
+               }
         }
         }
-
         if (vfsp_ref) {
                 mount_list_lock();
                 vfsp->vfc_refcount--;
                 mount_list_unlock();
         }
         if (vfsp_ref) {
                 mount_list_lock();
                 vfsp->vfc_refcount--;
                 mount_list_unlock();
         }
-       vnode_put(vp);
-       vnode_put(pvp);
-       nameidone(&nd);
  
         return(error);
  }
  
  
         return(error);
  }
  
-void
-enablequotas(struct mount *mp, vfs_context_t ctx)
+/* 
+ * Flush in-core data, check for competing mount attempts,
+ * and set VMOUNT
+ */
+int
+prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
  {
  {
-       struct nameidata qnd;
-       int type;
-       char qfpath[MAXPATHLEN];
-       const char *qfname = QUOTAFILENAME;
-       const char *qfopsname = QUOTAOPSNAME;
-       const char *qfextension[] = INITQFNAMES;
-
-       /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
-       if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
-               return;
-       }
-       /* 
-        * Enable filesystem disk quotas if necessary.
-        * We ignore errors as this should not interfere with final mount
-        */
-       for (type=0; type < MAXQUOTAS; type++) {
-               snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
-               NDINIT(&qnd, LOOKUP, FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T(qfpath), ctx);
-               if (namei(&qnd) != 0)
-                       continue;           /* option file to trigger quotas is not present */
-               vnode_put(qnd.ni_vp);
-               nameidone(&qnd);
-               snprintf(qfpath, sizeof(qfpath),  "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
+#if !CONFIG_MACF
+#pragma unused(cnp,fsname)
+#endif
+       struct vnode_attr va;
+       int error;
  
  
-               (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
+       if (!skip_auth) {
+               /*
+                * If the user is not root, ensure that they own the directory
+                * onto which we are attempting to mount.
+                */
+               VATTR_INIT(&va);
+               VATTR_WANTED(&va, va_uid);
+               if ((error = vnode_getattr(vp, &va, ctx)) ||
+                               (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
+                                (!vfs_context_issuser(ctx)))) { 
+                       error = EPERM;
+                       goto out;
+               }
         }
         }
-       return;
-}
-
  
  
-static int
-checkdirs_callback(proc_t p, void * arg) 
-{
-       struct cdirargs * cdrp = (struct cdirargs * )arg;
-       vnode_t olddp = cdrp->olddp;
-       vnode_t newdp = cdrp->newdp;
-       struct filedesc *fdp;
-       vnode_t tvp;
-       vnode_t fdp_cvp;
-       vnode_t fdp_rvp;
-       int cdir_changed = 0;
-       int rdir_changed = 0;
+       if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
+               goto out;
  
  
-       /*
-        * XXX Also needs to iterate each thread in the process to see if it
-        * XXX is using a per-thread current working directory, and, if so,
-        * XXX update that as well.
-        */
+       if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
+               goto out;
  
  
-       proc_fdlock(p);
-       fdp = p->p_fd;
-       if (fdp == (struct filedesc *)0) {
-               proc_fdunlock(p);
-               return(PROC_RETURNED);
+       if (vp->v_type != VDIR) {
+               error = ENOTDIR;
+               goto out;
         }
         }
-       fdp_cvp = fdp->fd_cdir;
-       fdp_rvp = fdp->fd_rdir;
-       proc_fdunlock(p);
  
  
-       if (fdp_cvp == olddp) {
-               vnode_ref(newdp);
-               tvp = fdp->fd_cdir;
-               fdp_cvp = newdp;
-               cdir_changed = 1;
-               vnode_rele(tvp);
-       }
-       if (fdp_rvp == olddp) {
-               vnode_ref(newdp);
-               tvp = fdp->fd_rdir;
-               fdp_rvp = newdp;
-               rdir_changed = 1;
-               vnode_rele(tvp);
-       }
-       if (cdir_changed || rdir_changed) {
-               proc_fdlock(p);
-               fdp->fd_cdir = fdp_cvp;
-               fdp->fd_rdir = fdp_rvp;
-               proc_fdunlock(p);
+       if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
+               error = EBUSY;
+               goto out;
         }
         }
-       return(PROC_RETURNED);
+
+#if CONFIG_MACF
+       error = mac_mount_check_mount(ctx, vp,
+           cnp, fsname);
+       if (error != 0)
+               goto out;
+#endif
+
+       vnode_lock_spin(vp);
+       SET(vp->v_flag, VMOUNT);
+       vnode_unlock(vp);
+
+out:
+       return error;
  }
  
  }
  
+#if CONFIG_IMGSRC_ACCESS
  
  
+#if DEBUG
+#define IMGSRC_DEBUG(args...) printf(args)
+#else
+#define IMGSRC_DEBUG(args...) do { } while(0)
+#endif 
  
  
-/*
- * Scan all active processes to see if any of them have a current
- * or root directory onto which the new filesystem has just been
- * mounted. If so, replace them with the new mount point.
- */
  static int
  static int
-checkdirs(vnode_t olddp, vfs_context_t ctx)
+authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
  {
  {
-       vnode_t newdp;
-       vnode_t tvp;
-       int err;
-       struct cdirargs cdr;
-       struct uthread * uth = get_bsdthread_info(current_thread());
-
-       if (olddp->v_usecount == 1)
-               return(0);
-       if (uth != (struct uthread *)0)
-               uth->uu_notrigger = 1;
-       err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
-       if (uth != (struct uthread *)0)
-               uth->uu_notrigger = 0;
+       struct nameidata nd;
+       vnode_t vp, realdevvp;
+       mode_t accessmode;
+       int error;
  
  
-       if (err != 0) {
-#if DIAGNOSTIC
-               panic("mount: lost mount: error %d", err);
-#endif
-               return(err);
+       NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
+       if ( (error = namei(&nd)) ) {
+               IMGSRC_DEBUG("namei() failed with %d\n", error);
+               return error;
         }
  
         }
  
-       cdr.olddp = olddp;
-       cdr.newdp = newdp;
-       /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
-       proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
+       vp = nd.ni_vp;
  
  
-       if (rootvnode == olddp) {
-               vnode_ref(newdp);
-               tvp = rootvnode;
-               rootvnode = newdp;
-               vnode_rele(tvp);
+       if (!vnode_isblk(vp)) {
+               IMGSRC_DEBUG("Not block device.\n");
+               error = ENOTBLK;
+               goto out;
         }
  
         }
  
-       vnode_put(newdp);
-       return(0);
-}
-
-/*
- * Unmount a file system.
- *
- * Note: unmount takes a path to the vnode mounted on as argument,
- * not special file (as before).
- */
-/* ARGSUSED */
-int
-unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
-{
-       vnode_t vp;
-       struct mount *mp;
-       int error;
-       struct nameidata nd;
-       vfs_context_t ctx = vfs_context_current();
-
-       NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1, 
-               UIO_USERSPACE, uap->path, ctx);
-       error = namei(&nd);
-       if (error)
-               return (error);
-       vp = nd.ni_vp;
-       mp = vp->v_mount;
-       nameidone(&nd);
+       realdevvp = mp->mnt_devvp;
+       if (realdevvp == NULLVP) {
+               IMGSRC_DEBUG("No device backs the mount.\n");
+               error = ENXIO;
+               goto out;
+       }
  
  
-#if CONFIG_MACF
-       error = mac_mount_check_umount(ctx, mp);
+       error = vnode_getwithref(realdevvp);
         if (error != 0) {
         if (error != 0) {
-               vnode_put(vp);
-               return (error);
+               IMGSRC_DEBUG("Coudn't get iocount on device.\n");
+               goto out;
         }
         }
-#endif
+
+       if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
+               IMGSRC_DEBUG("Wrong dev_t.\n");
+               error = ENXIO;
+               goto out1;
+       }
+
+       strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
+
         /*
         /*
-        * Must be the root of the filesystem
+        * If mount by non-root, then verify that user has necessary
+        * permissions on the device.
          */
          */
-       if ((vp->v_flag & VROOT) == 0) {
-               vnode_put(vp);
-               return (EINVAL);
+       if (!vfs_context_issuser(ctx)) {
+               accessmode = KAUTH_VNODE_READ_DATA;
+               if ((mp->mnt_flag & MNT_RDONLY) == 0)
+                       accessmode |= KAUTH_VNODE_WRITE_DATA;
+               if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
+                       IMGSRC_DEBUG("Access denied.\n");
+                       goto out1;
+               }
         }
         }
-       mount_ref(mp, 0);
-       vnode_put(vp);
-       /* safedounmount consumes the mount ref */
-       return (safedounmount(mp, uap->flags, ctx));
-}
  
  
-int
-vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
-{
-       mount_t mp;
+       *devvpp = vp;
  
  
-       mp = mount_list_lookupby_fsid(fsid, 0, 1);
-       if (mp == (mount_t)0) {
-               return(ENOENT);
+out1:
+       vnode_put(realdevvp);
+out:
+       nameidone(&nd);
+       if (error) {
+               vnode_put(vp);
         }
         }
-       mount_ref(mp, 0);
-       mount_iterdrop(mp);
-       /* safedounmount consumes the mount ref */
-       return(safedounmount(mp, flags, ctx));
-}
  
  
+       return error;
+}
  
  /*
  
  /*
- * The mount struct comes with a mount ref which will be consumed.
- * Do the actual file system unmount, prevent some common foot shooting.
+ * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
+ * and call checkdirs()
   */
   */
-int
-safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
+static int
+place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
  {
         int error;
  {
         int error;
-       proc_t p = vfs_context_proc(ctx);
  
  
-       /*
-        * Only root, or the user that did the original mount is
-        * permitted to unmount this filesystem.
-        */
-       if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
-           (error = suser(kauth_cred_get(), &p->p_acflag)))
-               goto out;
-
-       /*
-        * Don't allow unmounting the root file system.
-        */
-       if (mp->mnt_flag & MNT_ROOTFS) {
-               error = EBUSY; /* the root is always busy */
-               goto out;
-       }
-
-       return (dounmount(mp, flags, 1, ctx));
-
-out:
-       mount_drop(mp, 0);
-       return(error);
-}
+       mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
  
  
-/*
- * Do the actual file system unmount.
- */
-int
-dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
-{
-       vnode_t coveredvp = (vnode_t)0;
-       int error;
-       int needwakeup = 0;
-       int forcedunmount = 0;
-       int lflags = 0;
-       struct vnode *devvp = NULLVP;
+       vnode_lock_spin(vp);
+       CLR(vp->v_flag, VMOUNT);
+       vp->v_mountedhere = mp;
+       vnode_unlock(vp);
  
  
-       if (flags & MNT_FORCE)
-               forcedunmount = 1;
-       mount_lock(mp);
-       /* XXX post jaguar fix LK_DRAIN - then clean this up */
-       if ((flags & MNT_FORCE)) {
-               mp->mnt_kern_flag |= MNTK_FRCUNMOUNT;
-               mp->mnt_lflag |= MNT_LFORCE;
-       }
-       if (mp->mnt_lflag & MNT_LUNMOUNT) {
-               mp->mnt_lflag |= MNT_LWAIT;
-               if(withref != 0)
-                       mount_drop(mp, 1);
-               msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "dounmount", NULL);
-               /*
-                * The prior unmount attempt has probably succeeded.
-                * Do not dereference mp here - returning EBUSY is safest.
-                */
-               return (EBUSY);
-       }
-       mp->mnt_kern_flag |= MNTK_UNMOUNT;
-       mp->mnt_lflag |= MNT_LUNMOUNT;
-       mp->mnt_flag &=~ MNT_ASYNC;
-       /*
-        * anyone currently in the fast path that
-        * trips over the cached rootvp will be
-        * dumped out and forced into the slow path
-        * to regenerate a new cached value
-        */
-       mp->mnt_realrootvp = NULLVP;
-       mount_unlock(mp);
- 
         /*
          * taking the name_cache_lock exclusively will
          * insure that everyone is out of the fast path who
         /*
          * taking the name_cache_lock exclusively will
          * insure that everyone is out of the fast path who
@@ -1149,2418 +1246,3670 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
         mount_generation++;
         name_cache_unlock();
  
         mount_generation++;
         name_cache_unlock();
  
-
-       lck_rw_lock_exclusive(&mp->mnt_rwlock);
-       if (withref != 0)
-               mount_drop(mp, 0);
-#if CONFIG_FSE
-       fsevent_unmount(mp);  /* has to come first! */
-#endif
-       error = 0;
-       if (forcedunmount == 0) {
-               ubc_umount(mp); /* release cached vnodes */
-               if ((mp->mnt_flag & MNT_RDONLY) == 0) {
-                       error = VFS_SYNC(mp, MNT_WAIT, ctx);
-                       if (error) {
-                               mount_lock(mp);
-                               mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
-                               mp->mnt_lflag &= ~MNT_LUNMOUNT;
-                               mp->mnt_lflag &= ~MNT_LFORCE;
-                               goto out;
-                       }
-               }
-       }
-       
-       if (forcedunmount)
-               lflags |= FORCECLOSE;
-       error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM  | SKIPROOT | lflags);
-       if ((forcedunmount == 0) && error) {
-               mount_lock(mp);
-               mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
-               mp->mnt_lflag &= ~MNT_LUNMOUNT;
-               mp->mnt_lflag &= ~MNT_LFORCE;
+       error = vnode_ref(vp);
+       if (error != 0) {
                 goto out;
         }
  
                 goto out;
         }
  
-       /* make sure there are no one in the mount iterations or lookup */
-       mount_iterdrain(mp);
-
-       error = VFS_UNMOUNT(mp, flags, ctx);
-       if (error) {
-               mount_iterreset(mp);
-               mount_lock(mp);
-               mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
-               mp->mnt_lflag &= ~MNT_LUNMOUNT;
-               mp->mnt_lflag &= ~MNT_LFORCE;
+       error = checkdirs(vp, ctx);
+       if (error != 0)  {
+               /* Unmount the filesystem as cdir/rdirs cannot be updated */
+               vnode_rele(vp);
                 goto out;
         }
  
                 goto out;
         }
  
-       /* increment the operations count */
-       if (!error)
-               OSAddAtomic(1, &vfs_nummntops);
-
-       if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
-               /* hold an io reference and drop the usecount before close */
-               devvp = mp->mnt_devvp;
-               vnode_getalways(devvp);
-               vnode_rele(devvp);
-               VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
-                       ctx);
-               vnode_clearmountedon(devvp);
-               vnode_put(devvp);
-       }
-       lck_rw_done(&mp->mnt_rwlock);
-       mount_list_remove(mp);
-       lck_rw_lock_exclusive(&mp->mnt_rwlock);
-       
-       /* mark the mount point hook in the vp but not drop the ref yet */
-       if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
-                       vnode_getwithref(coveredvp);
-                       vnode_lock_spin(coveredvp);
-                       coveredvp->v_mountedhere = (struct mount *)0;
-                       vnode_unlock(coveredvp);
-                       vnode_put(coveredvp);
+out:
+       if (error != 0) {
+               mp->mnt_vnodecovered = NULLVP;
         }
         }
+       return error;
+}
  
  
-       mount_list_lock();
-       mp->mnt_vtable->vfc_refcount--;
-       mount_list_unlock();
+static void
+undo_place_on_covered_vp(mount_t mp, vnode_t vp)
+{
+       vnode_rele(vp);
+       vnode_lock_spin(vp);
+       vp->v_mountedhere = (mount_t)NULL;
+       vnode_unlock(vp);
  
  
-       cache_purgevfs(mp);     /* remove cache entries for this file sys */
-       vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
-       mount_lock(mp);
-       mp->mnt_lflag |= MNT_LDEAD;
+       mp->mnt_vnodecovered = NULLVP;
+}
  
  
-       if (mp->mnt_lflag & MNT_LWAIT) {
-               /*
-                * do the wakeup here
-                * in case we block in mount_refdrain
-                * which will drop the mount lock
-                * and allow anyone blocked in vfs_busy
-                * to wakeup and see the LDEAD state
-                */
-               mp->mnt_lflag &= ~MNT_LWAIT;
-               wakeup((caddr_t)mp);
-       }
-       mount_refdrain(mp);
-out:
-       if (mp->mnt_lflag & MNT_LWAIT) {
-               mp->mnt_lflag &= ~MNT_LWAIT;
-               needwakeup = 1; 
+static int
+mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
+{
+       int error;
+
+       /* unmount in progress return error */
+       mount_lock_spin(mp);
+       if (mp->mnt_lflag & MNT_LUNMOUNT) {
+               mount_unlock(mp);
+               return EBUSY;
         }
         mount_unlock(mp);
         }
         mount_unlock(mp);
-       lck_rw_done(&mp->mnt_rwlock);
+       lck_rw_lock_exclusive(&mp->mnt_rwlock);
  
  
-       if (needwakeup)
-               wakeup((caddr_t)mp);
-       if (!error) {
-               if ((coveredvp != NULLVP)) {
-                       vnode_t pvp;
+       /*
+        * We only allow the filesystem to be reloaded if it
+        * is currently mounted read-only.
+        */
+       if ((flags & MNT_RELOAD) &&
+                       ((mp->mnt_flag & MNT_RDONLY) == 0)) {
+               error = ENOTSUP;
+               goto out;
+       }
  
  
-                       vnode_getwithref(coveredvp);
-                       pvp = vnode_getparent(coveredvp);
-                       vnode_rele(coveredvp);
-                       vnode_lock_spin(coveredvp);
-                       if(mp->mnt_crossref == 0) {
-                               vnode_unlock(coveredvp);
-                               mount_lock_destroy(mp);
+       /*
+        * Only root, or the user that did the original mount is
+        * permitted to update it.
+        */
+       if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
+                       (!vfs_context_issuser(ctx))) { 
+               error = EPERM;
+               goto out;
+       }
  #if CONFIG_MACF
  #if CONFIG_MACF
-                               mac_mount_label_destroy(mp);
+       error = mac_mount_check_remount(ctx, mp);
+       if (error != 0) {
+               goto out;
+       }
  #endif
  #endif
-                               FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
-                       }  else {
-                               coveredvp->v_lflag |= VL_MOUNTDEAD;
-                               vnode_unlock(coveredvp);
-                       }
-                       vnode_put(coveredvp);
  
  
-                       if (pvp) {
-                               lock_vnode_and_post(pvp, NOTE_WRITE);
-                               vnode_put(pvp);
-                       }
-               } else if (mp->mnt_flag & MNT_ROOTFS) {
-                               mount_lock_destroy(mp);
-#if CONFIG_MACF
-                               mac_mount_label_destroy(mp);
-#endif
-                               FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
-               } else
-                       panic("dounmount: no coveredvp");
+out:
+       if (error) {
+               lck_rw_done(&mp->mnt_rwlock);
         }
         }
-       return (error);
+
+       return error;
  }
  
  }
  
-void
-mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
+static void 
+mount_end_update(mount_t mp)
  {
  {
-               vnode_lock(dp);
-               mp->mnt_crossref--;
-               if (mp->mnt_crossref < 0)
-                       panic("mount cross refs -ve");
-               if (((dp->v_lflag & VL_MOUNTDEAD) == VL_MOUNTDEAD) && (mp->mnt_crossref == 0)) {
-                       dp->v_lflag &= ~VL_MOUNTDEAD;
-                       if (need_put)
-                               vnode_put_locked(dp);
-                       vnode_unlock(dp);
-                       mount_lock_destroy(mp);
-#if CONFIG_MACF
-                       mac_mount_label_destroy(mp);
-#endif
-                       FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
-                       return;
-               }
-               if (need_put)
-                       vnode_put_locked(dp);
-               vnode_unlock(dp);
+       lck_rw_done(&mp->mnt_rwlock);
  }
  
  }
  
+static int
+get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
+{
+       vnode_t vp;
  
  
-/*
- * Sync each mounted filesystem.
- */
-#if DIAGNOSTIC
-int syncprt = 0;
-struct ctldebug debug0 = { "syncprt", &syncprt };
-#endif
+       if (height >= MAX_IMAGEBOOT_NESTING) {
+               return EINVAL;
+       }
  
  
-int print_vmpage_stat=0;
+       vp = imgsrc_rootvnodes[height];
+       if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
+               *rvpp = vp;
+               return 0;
+       } else {
+               return ENOENT;
+       }
+}
  
  
-static int 
-sync_callback(mount_t mp, void * arg)
+static int
+relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, 
+               const char *fsname, vfs_context_t ctx, 
+               boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
  {
  {
-       int asyncflag;
+       int error;
+       mount_t mp;
+       boolean_t placed = FALSE;
+       vnode_t devvp = NULLVP;
+       struct vfstable *vfsp;
+       user_addr_t devpath;
+       char *old_mntonname;
+       vnode_t rvp;
+       uint32_t height;
+       uint32_t flags;
  
  
-       if ((mp->mnt_flag & MNT_RDONLY) == 0) {
-                       asyncflag = mp->mnt_flag & MNT_ASYNC;
-                       mp->mnt_flag &= ~MNT_ASYNC;
-                       VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_current());
-                       if (asyncflag)
-                               mp->mnt_flag |= MNT_ASYNC;
+       /* If we didn't imageboot, nothing to move */
+       if (imgsrc_rootvnodes[0] == NULLVP) {
+               return EINVAL;
         }
         }
-       return(VFS_RETURNED);
-}
  
  
+       /* Only root can do this */
+       if (!vfs_context_issuser(ctx)) {
+               return EPERM;
+       }
  
  
-#include <kern/clock.h>
+       IMGSRC_DEBUG("looking for root vnode.\n");
  
  
-clock_sec_t sync_wait_time = 0;
+       /*
+        * Get root vnode of filesystem we're moving.
+        */
+       if (by_index) {
+               if (is64bit) {
+                       struct user64_mnt_imgsrc_args mia64;
+                       error = copyin(fsmountargs, &mia64, sizeof(mia64));
+                       if (error != 0) {
+                               IMGSRC_DEBUG("Failed to copy in arguments.\n");
+                               return error;
+                       }
  
  
-/* ARGSUSED */
-int
-sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
-{
-       clock_nsec_t nsecs;
+                       height = mia64.mi_height;
+                       flags = mia64.mi_flags;
+                       devpath = mia64.mi_devpath;
+               } else {
+                       struct user32_mnt_imgsrc_args mia32;
+                       error = copyin(fsmountargs, &mia32, sizeof(mia32));
+                       if (error != 0) {
+                               IMGSRC_DEBUG("Failed to copy in arguments.\n");
+                               return error;
+                       }
+
+                       height = mia32.mi_height;
+                       flags = mia32.mi_flags;
+                       devpath = mia32.mi_devpath;
+               }
+       } else {
+               /*
+                * For binary compatibility--assumes one level of nesting.
+                */
+               if (is64bit) {
+                       if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
+                               return error;
+               } else {
+                       user32_addr_t tmp;
+                       if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
+                               return error;
  
  
-       vfs_iterate(LK_NOWAIT, sync_callback, (void *)0);
+                       /* munge into LP64 addr */
+                       devpath = CAST_USER_ADDR_T(tmp);
+               }
  
  
-       {
-               static fsid_t fsid = { { 0, 0 } };
-               
-               clock_get_calendar_microtime(&sync_wait_time, &nsecs);
-               vfs_event_signal(&fsid, VQ_SYNCEVENT, (intptr_t)NULL);  
-               wakeup((caddr_t)&sync_wait_time);
+               height = 0;
+               flags = 0;
         }
  
         }
  
-       {
-       if(print_vmpage_stat) {
-               vm_countdirtypages();
+       if (flags != 0) {
+               IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
+               return EINVAL;
         }
         }
-       }
-#if DIAGNOSTIC
-       if (syncprt)
-               vfs_bufstats();
-#endif /* DIAGNOSTIC */
-       return (0);
-}
  
  
-/*
- * Change filesystem quotas.
- */
-#if QUOTA
-static int quotactl_funneled(proc_t p, struct quotactl_args *uap, int32_t *retval);
+       error = get_imgsrc_rootvnode(height, &rvp);
+       if (error != 0) {
+               IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
+               return error;
+       }
  
  
-int
-quotactl(proc_t p, struct quotactl_args *uap, int32_t *retval)
-{
-       boolean_t funnel_state;
-       int error;
-       
-       funnel_state = thread_funnel_set(kernel_flock, TRUE);
-       error = quotactl_funneled(p, uap, retval);
-       thread_funnel_set(kernel_flock, funnel_state);
-       return(error);
-}
+       IMGSRC_DEBUG("got root vnode.\n");
  
  
-static int
-quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
-{
-       struct mount *mp;
-       int error, quota_cmd, quota_status;
-       caddr_t datap;
-       size_t fnamelen;
-       struct nameidata nd;
-       vfs_context_t ctx = vfs_context_current();
-       struct dqblk my_dqblk;
+       MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
  
  
-       AUDIT_ARG(uid, uap->uid);
-       AUDIT_ARG(cmd, uap->cmd);
-       NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
-               UIO_USERSPACE, uap->path, ctx);
-       error = namei(&nd);
-       if (error)
-               return (error);
-       mp = nd.ni_vp->v_mount;
-       vnode_put(nd.ni_vp);
-       nameidone(&nd);
+       /* Can only move once */
+       mp = vnode_mount(rvp);
+       if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
+               IMGSRC_DEBUG("Already moved.\n");
+               error = EBUSY;
+               goto out0;
+       }
  
  
-       /* copyin any data we will need for downstream code */
-       quota_cmd = uap->cmd >> SUBCMDSHIFT;
+       IMGSRC_DEBUG("Starting updated.\n");
  
  
-       switch (quota_cmd) {
-       case Q_QUOTAON:
-               /* uap->arg specifies a file from which to take the quotas */
-               fnamelen = MAXPATHLEN;
-               datap = kalloc(MAXPATHLEN);
-               error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
-               break;
-       case Q_GETQUOTA:
-               /* uap->arg is a pointer to a dqblk structure. */
-               datap = (caddr_t) &my_dqblk;
-               break;
-       case Q_SETQUOTA:
-       case Q_SETUSE:
-               /* uap->arg is a pointer to a dqblk structure. */
-               datap = (caddr_t) &my_dqblk;
-               if (proc_is64bit(p)) {
-                       struct user_dqblk       my_dqblk64;
-                       error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
-                       if (error == 0) {
-                               munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
-                       }
-               }
-               else {
-                       error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
-               }
-               break;
-       case Q_QUOTASTAT:
-               /* uap->arg is a pointer to an integer */
-               datap = (caddr_t) &quota_status;
-               break;
-       default:
-               datap = NULL;
-               break;
-       } /* switch */
+       /* Get exclusive rwlock on mount, authorize update on mp */
+       error = mount_begin_update(mp , ctx, 0);
+       if (error != 0) {
+               IMGSRC_DEBUG("Starting updated failed with %d\n", error);
+               goto out0;
+       }
  
  
-       if (error == 0) {
-               error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
+       /* 
+        * It can only be moved once.  Flag is set under the rwlock,
+        * so we're now safe to proceed.
+        */
+       if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
+               IMGSRC_DEBUG("Already moved [2]\n");
+               goto out1;
         }
         }
+               
+       
+       IMGSRC_DEBUG("Preparing coveredvp.\n");
  
  
-       switch (quota_cmd) {
-       case Q_QUOTAON:
-               if (datap != NULL)
-                       kfree(datap, MAXPATHLEN);
-               break;
-       case Q_GETQUOTA:
-               /* uap->arg is a pointer to a dqblk structure we need to copy out to */
-               if (error == 0) {
-                       if (proc_is64bit(p)) {
-                               struct user_dqblk       my_dqblk64;
-                               munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
-                               error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
-                       }
-                       else {
-                               error = copyout(datap, uap->arg, sizeof (struct dqblk));
-                       }
-               }
-               break;
-       case Q_QUOTASTAT:
-               /* uap->arg is a pointer to an integer */
-               if (error == 0) {
-                       error = copyout(datap, uap->arg, sizeof(quota_status));
-               }
-               break;
-       default:
-               break;
-       } /* switch */
+       /* Mark covered vnode as mount in progress, authorize placing mount on top */
+       error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
+       if (error != 0) {
+               IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
+               goto out1;
+       }
+       
+       IMGSRC_DEBUG("Covered vp OK.\n");
  
  
-       return (error);
-}
-#else
-int
-quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
-{
-       return (EOPNOTSUPP);
-}
-#endif /* QUOTA */
+       /* Sanity check the name caller has provided */
+       vfsp = mp->mnt_vtable;
+       if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
+               IMGSRC_DEBUG("Wrong fs name.\n");
+               error = EINVAL;
+               goto out2;
+       }
  
  
-/*
- * Get filesystem statistics.
- *
- * Returns:    0                       Success
- *     namei:???
- *     vfs_update_vfsstat:???
- *     munge_statfs:EFAULT
- */
-/* ARGSUSED */
-int
-statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
-{
-       struct mount *mp;
-       struct vfsstatfs *sp;
-       int error;
-       struct nameidata nd;
-       vfs_context_t ctx = vfs_context_current();
-       vnode_t vp;
+       /* Check the device vnode and update mount-from name, for local filesystems */
+       if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
+               IMGSRC_DEBUG("Local, doing device validation.\n");
  
  
-       NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1, 
-               UIO_USERSPACE, uap->path, ctx);
-       error = namei(&nd);
-       if (error)
-               return (error);
-       vp = nd.ni_vp;
-       mp = vp->v_mount;
-       sp = &mp->mnt_vfsstat;
-       nameidone(&nd);
+               if (devpath != USER_ADDR_NULL) {
+                       error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
+                       if (error) {
+                               IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
+                               goto out2;
+                       }
  
  
-       error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
-       vnode_put(vp);
-       if (error != 0) 
-               return (error);
+                       vnode_put(devvp);
+               }
+       }
  
  
-       error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
-       return (error);
-}
+       /* 
+        * Place mp on top of vnode, ref the vnode,  call checkdirs(),
+        * and increment the name cache's mount generation 
+        */
  
  
-/*
- * Get filesystem statistics.
- */
-/* ARGSUSED */
-int
-fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
-{
-       vnode_t vp;
-       struct mount *mp;
-       struct vfsstatfs *sp;
-       int error;
+       IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
+       error = place_mount_and_checkdirs(mp, vp, ctx);
+       if (error != 0) {
+               goto out2;
+       }
  
  
-       AUDIT_ARG(fd, uap->fd);
+       placed = TRUE;
  
  
-       if ( (error = file_vnode(uap->fd, &vp)) )
-               return (error);
+       strncpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
+       strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
  
  
-       AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
+       /* Forbid future moves */
+       mount_lock(mp);
+       mp->mnt_kern_flag |= MNTK_HAS_MOVED;
+       mount_unlock(mp);
  
  
-       mp = vp->v_mount;
-       if (!mp) {
-               file_drop(uap->fd);
-               return (EBADF);
-       }
-       sp = &mp->mnt_vfsstat;
-       if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
-               file_drop(uap->fd);
-               return (error);
+       /* Finally, add to mount list, completely ready to go */
+       if (mount_list_add(mp) != 0) {
+               /*
+                * The system is shutting down trying to umount
+                * everything, so fail with a plausible errno.
+                */
+               error = EBUSY;
+               goto out3;
         }
         }
-       file_drop(uap->fd);
  
  
-       error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
+       mount_end_update(mp);
+       vnode_put(rvp);
+       FREE(old_mntonname, M_TEMP);
  
  
-       return (error);
-}
+       vfs_notify_mount(pvp);
  
  
-/* 
- * Common routine to handle copying of statfs64 data to user space 
- */
-static int 
-statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
-{
-       int error;
-       struct statfs64 sfs;
-       
-       bzero(&sfs, sizeof(sfs));
+       return 0;
+out3:
+       strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
  
  
-       sfs.f_bsize = sfsp->f_bsize;
-       sfs.f_iosize = (int32_t)sfsp->f_iosize;
-       sfs.f_blocks = sfsp->f_blocks;
-       sfs.f_bfree = sfsp->f_bfree;
-       sfs.f_bavail = sfsp->f_bavail;
-       sfs.f_files = sfsp->f_files;
-       sfs.f_ffree = sfsp->f_ffree;
-       sfs.f_fsid = sfsp->f_fsid;
-       sfs.f_owner = sfsp->f_owner;
-       sfs.f_type = mp->mnt_vtable->vfc_typenum;
-       sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
-       sfs.f_fssubtype = sfsp->f_fssubtype;
-       strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
-       strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
-       strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
+       mount_lock(mp);
+       mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
+       mount_unlock(mp);
  
  
-       error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
+out2:
+       /* 
+        * Placing the mp on the vnode clears VMOUNT,
+        * so cleanup is different after that point 
+        */
+       if (placed) {
+               /* Rele the vp, clear VMOUNT and v_mountedhere */
+               undo_place_on_covered_vp(mp, vp);
+       } else {
+               vnode_lock_spin(vp);
+               CLR(vp->v_flag, VMOUNT);
+               vnode_unlock(vp);
+       }
+out1:
+       mount_end_update(mp);
  
  
-       return(error);
+out0:
+       vnode_put(rvp);
+       FREE(old_mntonname, M_TEMP);
+       return error;
  }
  
  }
  
-/* 
- * Get file system statistics in 64-bit mode 
- */
-int
-statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
-{
-       struct mount *mp;
-       struct vfsstatfs *sp;
-       int error;
-       struct nameidata nd;
-       vfs_context_t ctxp = vfs_context_current();
-       vnode_t vp;
+#endif /* CONFIG_IMGSRC_ACCESS */
  
  
-       NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1, 
-               UIO_USERSPACE, uap->path, ctxp);
-       error = namei(&nd);
-       if (error)
-               return (error);
-       vp = nd.ni_vp;
-       mp = vp->v_mount;
-       sp = &mp->mnt_vfsstat;
-       nameidone(&nd);
-
-       error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
-       vnode_put(vp);
-       if (error != 0) 
-               return (error);
+void
+enablequotas(struct mount *mp, vfs_context_t ctx)
+{
+       struct nameidata qnd;
+       int type;
+       char qfpath[MAXPATHLEN];
+       const char *qfname = QUOTAFILENAME;
+       const char *qfopsname = QUOTAOPSNAME;
+       const char *qfextension[] = INITQFNAMES;
  
  
-       error = statfs64_common(mp, sp, uap->buf);
+       /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
+       if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
+               return;
+       }
+       /* 
+        * Enable filesystem disk quotas if necessary.
+        * We ignore errors as this should not interfere with final mount
+        */
+       for (type=0; type < MAXQUOTAS; type++) {
+               snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
+               NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
+                      CAST_USER_ADDR_T(qfpath), ctx);
+               if (namei(&qnd) != 0)
+                       continue;           /* option file to trigger quotas is not present */
+               vnode_put(qnd.ni_vp);
+               nameidone(&qnd);
+               snprintf(qfpath, sizeof(qfpath),  "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
  
  
-       return (error);
+               (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
+       }
+       return;
  }
  
  }
  
-/* 
- * Get file system statistics in 64-bit mode 
- */
-int
-fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
-{
-       struct vnode *vp;
-       struct mount *mp;
-       struct vfsstatfs *sp;
-       int error;
  
  
-       AUDIT_ARG(fd, uap->fd);
+static int
+checkdirs_callback(proc_t p, void * arg) 
+{
+       struct cdirargs * cdrp = (struct cdirargs * )arg;
+       vnode_t olddp = cdrp->olddp;
+       vnode_t newdp = cdrp->newdp;
+       struct filedesc *fdp;
+       vnode_t tvp;
+       vnode_t fdp_cvp;
+       vnode_t fdp_rvp;
+       int cdir_changed = 0;
+       int rdir_changed = 0;
  
  
-       if ( (error = file_vnode(uap->fd, &vp)) )
-               return (error);
+       /*
+        * XXX Also needs to iterate each thread in the process to see if it
+        * XXX is using a per-thread current working directory, and, if so,
+        * XXX update that as well.
+        */
  
  
-       AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
+       proc_fdlock(p);
+       fdp = p->p_fd;
+       if (fdp == (struct filedesc *)0) {
+               proc_fdunlock(p);
+               return(PROC_RETURNED);
+       }
+       fdp_cvp = fdp->fd_cdir;
+       fdp_rvp = fdp->fd_rdir;
+       proc_fdunlock(p);
  
  
-       mp = vp->v_mount;
-       if (!mp) {
-               file_drop(uap->fd);
-               return (EBADF);
+       if (fdp_cvp == olddp) {
+               vnode_ref(newdp);
+               tvp = fdp->fd_cdir;
+               fdp_cvp = newdp;
+               cdir_changed = 1;
+               vnode_rele(tvp);
         }
         }
-       sp = &mp->mnt_vfsstat;
-       if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
-               file_drop(uap->fd);
-               return (error);
+       if (fdp_rvp == olddp) {
+               vnode_ref(newdp);
+               tvp = fdp->fd_rdir;
+               fdp_rvp = newdp;
+               rdir_changed = 1;
+               vnode_rele(tvp);
         }
         }
-       file_drop(uap->fd);
-
-       error = statfs64_common(mp, sp, uap->buf);
-
-       return (error);
+       if (cdir_changed || rdir_changed) {
+               proc_fdlock(p);
+               fdp->fd_cdir = fdp_cvp;
+               fdp->fd_rdir = fdp_rvp;
+               proc_fdunlock(p);
+       }
+       return(PROC_RETURNED);
  }
  
  }
  
-struct getfsstat_struct {
-       user_addr_t     sfsp;
-       user_addr_t     *mp;
-       int             count;
-       int             maxcount;
-       int             flags;
-       int             error;
-};
  
  
  
  
+/*
+ * Scan all active processes to see if any of them have a current
+ * or root directory onto which the new filesystem has just been
+ * mounted. If so, replace them with the new mount point.
+ */
  static int
  static int
-getfsstat_callback(mount_t mp, void * arg)
+checkdirs(vnode_t olddp, vfs_context_t ctx)
  {
  {
-       
-       struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
-       struct vfsstatfs *sp;
-       int error, my_size;
-       vfs_context_t ctx = vfs_context_current();
-
-       if (fstp->sfsp && fstp->count < fstp->maxcount) {
-               sp = &mp->mnt_vfsstat;
-               /*
-                * If MNT_NOWAIT is specified, do not refresh the
-                * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
-                */
-               if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
-                       (error = vfs_update_vfsstat(mp, ctx,
-                           VFS_USER_EVENT))) {
-                       KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
-                       return(VFS_RETURNED);
-               }
+       vnode_t newdp;
+       vnode_t tvp;
+       int err;
+       struct cdirargs cdr;
  
  
-               /*
-                * Need to handle LP64 version of struct statfs
-                */
-               error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
-               if (error) {
-                       fstp->error = error;
-                       return(VFS_RETURNED_DONE);
-               }
-               fstp->sfsp += my_size;
+       if (olddp->v_usecount == 1)
+               return(0);
+       err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
  
  
-               if (fstp->mp) {
-                       error = mac_mount_label_get(mp, *fstp->mp);
-                       if (error) {
-                               fstp->error = error;
-                               return(VFS_RETURNED_DONE);
-                       }
-                       fstp->mp++;
-               }
+       if (err != 0) {
+#if DIAGNOSTIC
+               panic("mount: lost mount: error %d", err);
+#endif
+               return(err);
         }
         }
-       fstp->count++;
-       return(VFS_RETURNED);
-}
  
  
-/*
- * Get statistics on all filesystems.
- */
-int
-getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
-{
-       struct __mac_getfsstat_args muap;
+       cdr.olddp = olddp;
+       cdr.newdp = newdp;
+       /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
+       proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
  
  
-       muap.buf = uap->buf;
-       muap.bufsize = uap->bufsize;
-       muap.mac = USER_ADDR_NULL;
-       muap.macsize = 0;
-       muap.flags = uap->flags;
+       if (rootvnode == olddp) {
+               vnode_ref(newdp);
+               tvp = rootvnode;
+               rootvnode = newdp;
+               vnode_rele(tvp);
+       }
  
  
-       return (__mac_getfsstat(p, &muap, retval));
+       vnode_put(newdp);
+       return(0);
  }
  
  /*
  }
  
  /*
- * __mac_getfsstat: Get MAC-related file system statistics
- *
- * Parameters:    p                        (ignored)
- *                uap                      User argument descriptor (see below)
- *                retval                   Count of file system statistics (N stats)  
- *
- * Indirect:      uap->bufsize             Buffer size
- *                uap->macsize             MAC info size
- *                uap->buf                 Buffer where information will be returned
- *                uap->mac                 MAC info
- *                uap->flags               File system flags
- *                
- *
- * Returns:        0                       Success
- *                !0                       Not success
+ * Unmount a file system.
   *
   *
+ * Note: unmount takes a path to the vnode mounted on as argument,
+ * not special file (as before).
   */
   */
+/* ARGSUSED */
  int
  int
-__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
+unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
  {
  {
-       user_addr_t sfsp;
-       user_addr_t *mp;
-       size_t count, maxcount, bufsize, macsize;
-       struct getfsstat_struct fst;
-
-       bufsize = (size_t) uap->bufsize;
-       macsize = (size_t) uap->macsize;
-
-       if (IS_64BIT_PROCESS(p)) {
-               maxcount = bufsize / sizeof(struct user64_statfs);
-       }
-       else {
-               maxcount = bufsize / sizeof(struct user32_statfs);
-       }
-       sfsp = uap->buf;
-       count = 0;
+       vnode_t vp;
+       struct mount *mp;
+       int error;
+       struct nameidata nd;
+       vfs_context_t ctx = vfs_context_current();
  
  
-       mp = NULL;
+       NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1, 
+               UIO_USERSPACE, uap->path, ctx);
+       error = namei(&nd);
+       if (error)
+               return (error);
+       vp = nd.ni_vp;
+       mp = vp->v_mount;
+       nameidone(&nd);
  
  #if CONFIG_MACF
  
  #if CONFIG_MACF
-       if (uap->mac != USER_ADDR_NULL) {
-               u_int32_t *mp0;
-               int error;
-               unsigned int i;
-
-               count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
-               if (count != maxcount)
-                       return (EINVAL);
-
-               /* Copy in the array */
-               MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
-               if (mp0 == NULL) {
-                       return (ENOMEM);
-               }
-
-               error = copyin(uap->mac, mp0, macsize);
-               if (error) {
-                       FREE(mp0, M_MACTEMP);
-                       return (error);
-               }
-
-               /* Normalize to an array of user_addr_t */
-               MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
-               if (mp == NULL) {
-                       FREE(mp0, M_MACTEMP);
-                       return (ENOMEM);
-               }
-
-               for (i = 0; i < count; i++) {
-                       if (IS_64BIT_PROCESS(p))
-                               mp[i] = ((user_addr_t *)mp0)[i];
-                       else
-                               mp[i] = (user_addr_t)mp0[i];
-               }
-               FREE(mp0, M_MACTEMP);
+       error = mac_mount_check_umount(ctx, mp);
+       if (error != 0) {
+               vnode_put(vp);
+               return (error);
         }
  #endif
         }
  #endif
-
-
-       fst.sfsp = sfsp;
-       fst.mp = mp;
-       fst.flags = uap->flags;
-       fst.count = 0;
-       fst.error = 0;
-       fst.maxcount = maxcount;
-
-       
-       vfs_iterate(0, getfsstat_callback, &fst);
-
-       if (mp)
-               FREE(mp, M_MACTEMP);
-
-       if (fst.error ) {
-               KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
-               return(fst.error);
+       /*
+        * Must be the root of the filesystem
+        */
+       if ((vp->v_flag & VROOT) == 0) {
+               vnode_put(vp);
+               return (EINVAL);
         }
         }
-
-       if (fst.sfsp && fst.count > fst.maxcount)
-               *retval = fst.maxcount;
-       else
-               *retval = fst.count;
-       return (0);
+       mount_ref(mp, 0);
+       vnode_put(vp);
+       /* safedounmount consumes the mount ref */
+       return (safedounmount(mp, uap->flags, ctx));
  }
  
  }
  
-static int
-getfsstat64_callback(mount_t mp, void * arg)
+int
+vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
  {
  {
-       struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
-       struct vfsstatfs *sp;
-       int error;
-
-       if (fstp->sfsp && fstp->count < fstp->maxcount) {
-               sp = &mp->mnt_vfsstat;
-               /*
-                * If MNT_NOWAIT is specified, do not refresh the fsstat
-                * cache. MNT_WAIT overrides MNT_NOWAIT.
-                *
-                * We treat MNT_DWAIT as MNT_WAIT for all instances of
-                * getfsstat, since the constants are out of the same
-                * namespace.
-                */
-               if (((fstp->flags & MNT_NOWAIT) == 0 ||
-                    (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
-                   (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
-                       KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
-                       return(VFS_RETURNED);
-               }
+       mount_t mp;
  
  
-               error = statfs64_common(mp, sp, fstp->sfsp);
-               if (error) {
-                       fstp->error = error;
-                       return(VFS_RETURNED_DONE);
-               }
-               fstp->sfsp += sizeof(struct statfs64);
+       mp = mount_list_lookupby_fsid(fsid, 0, 1);
+       if (mp == (mount_t)0) {
+               return(ENOENT);
         }
         }
-       fstp->count++;
-       return(VFS_RETURNED);
+       mount_ref(mp, 0);
+       mount_iterdrop(mp);
+       /* safedounmount consumes the mount ref */
+       return(safedounmount(mp, flags, ctx));
  }
  
  }
  
+
  /*
  /*
- * Get statistics on all file systems in 64 bit mode.
+ * The mount struct comes with a mount ref which will be consumed.
+ * Do the actual file system unmount, prevent some common foot shooting.
   */
  int
   */
  int
-getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
+safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
  {
  {
-       user_addr_t sfsp;
-       int count, maxcount;
-       struct getfsstat_struct fst;
-
-       maxcount = uap->bufsize / sizeof(struct statfs64);
-
-       sfsp = uap->buf;
-       count = 0;
+       int error;
+       proc_t p = vfs_context_proc(ctx);
  
  
-       fst.sfsp = sfsp;
-       fst.flags = uap->flags;
-       fst.count = 0;
-       fst.error = 0;
-       fst.maxcount = maxcount;
+       /*
+        * If the file system is not responding and MNT_NOBLOCK
+        * is set and not a forced unmount then return EBUSY.
+        */
+       if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
+               (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
+               error = EBUSY;
+               goto out;
+       }
  
  
-       vfs_iterate(0, getfsstat64_callback, &fst);
+       /*
+        * Skip authorization if the mount is tagged as permissive and 
+        * this is not a forced-unmount attempt.
+        */
+       if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
+               /*
+                * Only root, or the user that did the original mount is
+                * permitted to unmount this filesystem.
+                */
+               if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
+                               (error = suser(kauth_cred_get(), &p->p_acflag)))
+                       goto out;
+       }
+       /*
+        * Don't allow unmounting the root file system.
+        */
+       if (mp->mnt_flag & MNT_ROOTFS) {
+               error = EBUSY; /* the root is always busy */
+               goto out;
+       }
  
  
-       if (fst.error ) {
-               KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
-               return(fst.error);
+#ifdef CONFIG_IMGSRC_ACCESS
+       if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
+               error = EBUSY;
+               goto out;
         }
         }
+#endif /* CONFIG_IMGSRC_ACCESS */
  
  
-       if (fst.sfsp && fst.count > fst.maxcount)
-               *retval = fst.maxcount;
-       else
-               *retval = fst.count;
+       return (dounmount(mp, flags, 1, ctx));
  
  
-       return (0);
+out:
+       mount_drop(mp, 0);
+       return(error);
  }
  
  /*
  }
  
  /*
- * Change current working directory to a given file descriptor.
+ * Do the actual file system unmount.
   */
   */
-/* ARGSUSED */
-static int
-common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
+int
+dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
  {
  {
-       struct filedesc *fdp = p->p_fd;
-       vnode_t vp;
-       vnode_t tdp;
-       vnode_t tvp;
-       struct mount *mp;
+       vnode_t coveredvp = (vnode_t)0;
         int error;
         int error;
-       vfs_context_t ctx = vfs_context_current();
+       int needwakeup = 0;
+       int forcedunmount = 0;
+       int lflags = 0;
+       struct vnode *devvp = NULLVP;
+#if CONFIG_TRIGGERS
+       proc_t p = vfs_context_proc(ctx);
+       int did_vflush = 0;
+       int pflags_save = 0;
+#endif /* CONFIG_TRIGGERS */
  
  
-       AUDIT_ARG(fd, uap->fd);
-       if (per_thread && uap->fd == -1) {
-               /*
-                * Switching back from per-thread to per process CWD; verify we
-                * in fact have one before proceeding.  The only success case
-                * for this code path is to return 0 preemptively after zapping
-                * the thread structure contents.
-                */
-               thread_t th = vfs_context_thread(ctx);
-               if (th) {
-                       uthread_t uth = get_bsdthread_info(th);
-                       tvp = uth->uu_cdir;
-                       uth->uu_cdir = NULLVP;
-                       if (tvp != NULLVP) {
-                               vnode_rele(tvp);
-                               return (0);
-                       }
-               }
-               return (EBADF);
-       }
+       mount_lock(mp);
  
  
-       if ( (error = file_vnode(uap->fd, &vp)) )
-               return(error);
-       if ( (error = vnode_getwithref(vp)) ) {
-               file_drop(uap->fd);
-               return(error);
+       /*
+        * If already an unmount in progress just return EBUSY.
+        * Even a forced unmount cannot override.
+        */
+       if (mp->mnt_lflag & MNT_LUNMOUNT) {
+               if (withref != 0)
+                       mount_drop(mp, 1);
+               mount_unlock(mp);
+               return (EBUSY);
         }
  
         }
  
-       AUDIT_ARG(vnpath, vp, ARG_VNODE1);
-
-       if (vp->v_type != VDIR) {
-               error = ENOTDIR;
-               goto out;
+       if (flags & MNT_FORCE) {
+               forcedunmount = 1;
+               mp->mnt_lflag |= MNT_LFORCE;
         }
  
         }
  
-#if CONFIG_MACF
-       error = mac_vnode_check_chdir(ctx, vp);
-       if (error)
-               goto out;
+#if CONFIG_TRIGGERS
+       if (flags & MNT_NOBLOCK && p != kernproc)
+               pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
  #endif
  #endif
-       error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
-       if (error)
-               goto out;
  
  
-       while (!error && (mp = vp->v_mountedhere) != NULL) {
-               if (vfs_busy(mp, LK_NOWAIT)) {
-                       error = EACCES;
-                       goto out;
-               }
-               error = VFS_ROOT(mp, &tdp, ctx);
-               vfs_unbusy(mp);
-               if (error)
-                       break;
-               vnode_put(vp);
-               vp = tdp;
+       mp->mnt_kern_flag |= MNTK_UNMOUNT;
+       mp->mnt_lflag |= MNT_LUNMOUNT;
+       mp->mnt_flag &=~ MNT_ASYNC;
+       /*
+        * anyone currently in the fast path that
+        * trips over the cached rootvp will be
+        * dumped out and forced into the slow path
+        * to regenerate a new cached value
+        */
+       mp->mnt_realrootvp = NULLVP;
+       mount_unlock(mp);
+ 
+       if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
+               /*
+                * Force unmount any mounts in this filesystem.
+                * If any unmounts fail - just leave them dangling.
+                * Avoids recursion.
+                */
+               (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
         }
         }
-       if (error)
+
+       /*
+        * taking the name_cache_lock exclusively will
+        * insure that everyone is out of the fast path who
+        * might be trying to use a now stale copy of
+        * vp->v_mountedhere->mnt_realrootvp
+        * bumping mount_generation causes the cached values
+        * to be invalidated
+        */
+       name_cache_lock();
+       mount_generation++;
+       name_cache_unlock();
+
+
+       lck_rw_lock_exclusive(&mp->mnt_rwlock);
+       if (withref != 0)
+               mount_drop(mp, 0);
+#if CONFIG_FSE
+       fsevent_unmount(mp);  /* has to come first! */
+#endif
+       error = 0;
+       if (forcedunmount == 0) {
+               ubc_umount(mp); /* release cached vnodes */
+               if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+                       error = VFS_SYNC(mp, MNT_WAIT, ctx);
+                       if (error) {
+                               mount_lock(mp);
+                               mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
+                               mp->mnt_lflag &= ~MNT_LUNMOUNT;
+                               mp->mnt_lflag &= ~MNT_LFORCE;
+                               goto out;
+                       }
+               }
+       }
+
+#if CONFIG_TRIGGERS
+       vfs_nested_trigger_unmounts(mp, flags, ctx);
+       did_vflush = 1;
+#endif 
+       if (forcedunmount)
+               lflags |= FORCECLOSE;
+       error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM  | SKIPROOT | lflags);
+       if ((forcedunmount == 0) && error) {
+               mount_lock(mp);
+               mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
+               mp->mnt_lflag &= ~MNT_LUNMOUNT;
+               mp->mnt_lflag &= ~MNT_LFORCE;
                 goto out;
                 goto out;
-       if ( (error = vnode_ref(vp)) )
-               goto out;
-       vnode_put(vp);
+       }
  
  
-       if (per_thread) {
-               thread_t th = vfs_context_thread(ctx);
-               if (th) {
-                       uthread_t uth = get_bsdthread_info(th);
-                       tvp = uth->uu_cdir;
-                       uth->uu_cdir = vp;
-                       OSBitOrAtomic(P_THCWD, &p->p_flag);
-               } else {
-                       vnode_rele(vp);
-                       return (ENOENT);
+       /* make sure there are no one in the mount iterations or lookup */
+       mount_iterdrain(mp);
+
+       error = VFS_UNMOUNT(mp, flags, ctx);
+       if (error) {
+               mount_iterreset(mp);
+               mount_lock(mp);
+               mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
+               mp->mnt_lflag &= ~MNT_LUNMOUNT;
+               mp->mnt_lflag &= ~MNT_LFORCE;
+               goto out;
+       }
+
+       /* increment the operations count */
+       if (!error)
+               OSAddAtomic(1, &vfs_nummntops);
+
+       if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
+               /* hold an io reference and drop the usecount before close */
+               devvp = mp->mnt_devvp;
+               vnode_getalways(devvp);
+               vnode_rele(devvp);
+               VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
+                       ctx);
+               vnode_clearmountedon(devvp);
+               vnode_put(devvp);
+       }
+       lck_rw_done(&mp->mnt_rwlock);
+       mount_list_remove(mp);
+       lck_rw_lock_exclusive(&mp->mnt_rwlock);
+
+       /* mark the mount point hook in the vp but not drop the ref yet */
+       if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
+               /*
+                * The covered vnode needs special handling. Trying to get an
+                * iocount must not block here as this may lead to deadlocks
+                * if the Filesystem to which the covered vnode belongs is
+                * undergoing forced unmounts. Since we hold a usecount, the
+                * vnode cannot be reused (it can, however, still be terminated)
+                */
+               vnode_getalways(coveredvp);
+               vnode_lock_spin(coveredvp);
+
+               mp->mnt_crossref++;
+               coveredvp->v_mountedhere = (struct mount *)0;
+               CLR(coveredvp->v_flag, VMOUNT);
+
+               vnode_unlock(coveredvp);
+               vnode_put(coveredvp);
+       }
+
+       mount_list_lock();
+       mp->mnt_vtable->vfc_refcount--;
+       mount_list_unlock();
+
+       cache_purgevfs(mp);     /* remove cache entries for this file sys */
+       vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
+       mount_lock(mp);
+       mp->mnt_lflag |= MNT_LDEAD;
+
+       if (mp->mnt_lflag & MNT_LWAIT) {
+               /*
+                * do the wakeup here
+                * in case we block in mount_refdrain
+                * which will drop the mount lock
+                * and allow anyone blocked in vfs_busy
+                * to wakeup and see the LDEAD state
+                */
+               mp->mnt_lflag &= ~MNT_LWAIT;
+               wakeup((caddr_t)mp);
+       }
+       mount_refdrain(mp);
+out:
+       if (mp->mnt_lflag & MNT_LWAIT) {
+               mp->mnt_lflag &= ~MNT_LWAIT;
+               needwakeup = 1; 
+       }
+
+#if CONFIG_TRIGGERS
+       if (flags & MNT_NOBLOCK && p != kernproc) {
+               // Restore P_NOREMOTEHANG bit to its previous value
+               if ((pflags_save & P_NOREMOTEHANG) == 0)
+                       OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
+       }
+
+       /* 
+        * Callback and context are set together under the mount lock, and
+        * never cleared, so we're safe to examine them here, drop the lock, 
+        * and call out.
+        */
+       if (mp->mnt_triggercallback != NULL) {
+               mount_unlock(mp);
+               if (error == 0) {
+                       mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
+               } else if (did_vflush) {
+                       mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
                 }
         } else {
                 }
         } else {
-               proc_fdlock(p);
-               tvp = fdp->fd_cdir;
-               fdp->fd_cdir = vp;
-               proc_fdunlock(p);
+               mount_unlock(mp);
         }
         }
+#else 
+       mount_unlock(mp);
+#endif /* CONFIG_TRIGGERS */
  
  
-       if (tvp)
-               vnode_rele(tvp);
-       file_drop(uap->fd);
+       lck_rw_done(&mp->mnt_rwlock);
  
  
-       return (0);
-out:
-       vnode_put(vp);
-       file_drop(uap->fd);
+       if (needwakeup)
+               wakeup((caddr_t)mp);
  
  
-       return(error);
-}
+       if (!error) {
+               if ((coveredvp != NULLVP)) {
+                       vnode_t pvp = NULLVP;
  
  
-int
-fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
-{
-       return common_fchdir(p, uap, 0);
-}
+                       /*
+                        * The covered vnode needs special handling. Trying to
+                        * get an iocount must not block here as this may lead
+                        * to deadlocks if the Filesystem to which the covered
+                        * vnode belongs is undergoing forced unmounts. Since we
+                        * hold a usecount, the  vnode cannot be reused
+                        * (it can, however, still be terminated).
+                        */
+                       vnode_getalways(coveredvp);
  
  
-int
-__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
-{
-       return common_fchdir(p, (void *)uap, 1);
+                       mount_dropcrossref(mp, coveredvp, 0);
+                       /*
+                        * We'll _try_ to detect if this really needs to be
+                        * done. The coveredvp can only be in termination (or
+                        * terminated) if the coveredvp's mount point is in a
+                        * forced unmount (or has been) since we still hold the
+                        * ref.
+                        */
+                       if (!vnode_isrecycled(coveredvp)) {
+                               pvp = vnode_getparent(coveredvp);
+#if CONFIG_TRIGGERS
+                               if (coveredvp->v_resolve) {
+                                       vnode_trigger_rearm(coveredvp, ctx);
+                               }
+#endif
+                       }
+
+                       vnode_rele(coveredvp);
+                       vnode_put(coveredvp);
+                       coveredvp = NULLVP;
+
+                       if (pvp) {
+                               lock_vnode_and_post(pvp, NOTE_WRITE);
+                               vnode_put(pvp);
+                       }
+               } else if (mp->mnt_flag & MNT_ROOTFS) {
+                               mount_lock_destroy(mp);
+#if CONFIG_MACF
+                               mac_mount_label_destroy(mp);
+#endif
+                               FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
+               } else
+                       panic("dounmount: no coveredvp");
+       }
+       return (error);
  }
  
  /*
  }
  
  /*
- * Change current working directory (".").
- *
- * Returns:    0                       Success
- *     change_dir:ENOTDIR
- *     change_dir:???
- *     vnode_ref:ENOENT                No such file or directory
+ * Unmount any mounts in this filesystem.
   */
   */
-/* ARGSUSED */
-static int
-common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
+void
+dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
  {
  {
-       struct filedesc *fdp = p->p_fd;
-       int error;
-       struct nameidata nd;
-       vnode_t tvp;
-       vfs_context_t ctx = vfs_context_current();
+       mount_t smp;
+       fsid_t *fsids, fsid;
+       int fsids_sz;
+       int count = 0, i, m = 0;
+       vnode_t vp;
  
  
-       NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
-               UIO_USERSPACE, uap->path, ctx);
-       error = change_dir(&nd, ctx);
-       if (error)
-               return (error);
-       if ( (error = vnode_ref(nd.ni_vp)) ) {
-               vnode_put(nd.ni_vp);
-               return (error);
+       mount_list_lock();
+
+       // Get an array to hold the submounts fsids.
+       TAILQ_FOREACH(smp, &mountlist, mnt_list)
+               count++;
+       fsids_sz = count * sizeof(fsid_t);
+       MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
+       if (fsids == NULL) {
+               mount_list_unlock();
+               goto out;
         }
         }
+       fsids[0] = mp->mnt_vfsstat.f_fsid;      // Prime the pump
+
         /*
         /*
-        * drop the iocount we picked up in change_dir
+        * Fill the array with submount fsids.
+        * Since mounts are always added to the tail of the mount list, the
+        * list is always in mount order.  
+        * For each mount check if the mounted-on vnode belongs to a
+        * mount that's already added to our array of mounts to be unmounted.
          */
          */
-       vnode_put(nd.ni_vp);
+       for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
+               vp = smp->mnt_vnodecovered;
+               if (vp == NULL)
+                       continue;
+               fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid;     // Underlying fsid
+               for (i = 0; i <= m; i++) {
+                       if (fsids[i].val[0] == fsid.val[0] &&
+                           fsids[i].val[1] == fsid.val[1]) {
+                               fsids[++m] = smp->mnt_vfsstat.f_fsid;
+                               break;
+                       }
+               }
+       }
+       mount_list_unlock();
  
  
-       if (per_thread) {
-               thread_t th = vfs_context_thread(ctx);
-               if (th) {
-                       uthread_t uth = get_bsdthread_info(th);
-                       tvp = uth->uu_cdir;
-                       uth->uu_cdir = nd.ni_vp;
-                       OSBitOrAtomic(P_THCWD, &p->p_flag);
-               } else {
-                       vnode_rele(nd.ni_vp);
-                       return (ENOENT);
+       // Unmount the submounts in reverse order. Ignore errors.
+       for (i = m; i > 0; i--) {
+               smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
+               if (smp) {
+                       mount_ref(smp, 0);
+                       mount_iterdrop(smp);
+                       (void) dounmount(smp, flags, 1, ctx);
                 }
                 }
-       } else {
-               proc_fdlock(p);
-               tvp = fdp->fd_cdir;
-               fdp->fd_cdir = nd.ni_vp;
-               proc_fdunlock(p);
         }
         }
+out:
+       if (fsids)
+               FREE(fsids, M_TEMP);
+}
  
  
-       if (tvp)
-               vnode_rele(tvp);
+void
+mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
+{
+       vnode_lock(dp);
+       mp->mnt_crossref--;
  
  
-       return (0);
+       if (mp->mnt_crossref < 0)
+               panic("mount cross refs -ve");
+
+       if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
+                       
+               if (need_put)
+                       vnode_put_locked(dp);
+               vnode_unlock(dp);
+
+               mount_lock_destroy(mp);
+#if CONFIG_MACF
+               mac_mount_label_destroy(mp);
+#endif
+               FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
+               return;
+       }
+       if (need_put)
+               vnode_put_locked(dp);
+       vnode_unlock(dp);
  }
  
  
  /*
  }
  
  
  /*
- * chdir
- *
- * Change current working directory (".") for the entire process
- *
- * Parameters:  p       Process requesting the call
- *             uap     User argument descriptor (see below)
- *             retval  (ignored)
- *
- * Indirect parameters:        uap->path       Directory path
- *
- * Returns:    0                       Success
- *             common_chdir: ENOTDIR
- *             common_chdir: ENOENT    No such file or directory
- *             common_chdir: ???
- *
+ * Sync each mounted filesystem.
   */
   */
+#if DIAGNOSTIC
+int syncprt = 0;
+#endif
+
+int print_vmpage_stat=0;
+int sync_timeout = 60;  // Sync time limit (sec)
+
+static int 
+sync_callback(mount_t mp, __unused void *arg)
+{
+       if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+               int asyncflag = mp->mnt_flag & MNT_ASYNC;
+
+               mp->mnt_flag &= ~MNT_ASYNC;
+               VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
+               if (asyncflag)
+                       mp->mnt_flag |= MNT_ASYNC;
+       }
+
+       return (VFS_RETURNED);
+}
+
+/* ARGSUSED */
  int
  int
-chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
+sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
  {
  {
-       return common_chdir(p, (void *)uap, 0);
+       vfs_iterate(LK_NOWAIT, sync_callback, NULL);
+
+       if (print_vmpage_stat) {
+               vm_countdirtypages();
+       }
+
+#if DIAGNOSTIC
+       if (syncprt)
+               vfs_bufstats();
+#endif /* DIAGNOSTIC */
+       return 0;
+}
+
+static void
+sync_thread(void *arg, __unused wait_result_t wr)
+{
+       int *timeout = (int *) arg;
+
+       vfs_iterate(LK_NOWAIT, sync_callback, NULL);
+
+       if (timeout)
+               wakeup((caddr_t) timeout);
+       if (print_vmpage_stat) {
+               vm_countdirtypages();
+       }
+
+#if DIAGNOSTIC
+       if (syncprt)
+               vfs_bufstats();
+#endif /* DIAGNOSTIC */
  }
  
  /*
  }
  
  /*
- * __pthread_chdir
- *
- * Change current working directory (".") for a single thread
- *
- * Parameters:  p       Process requesting the call
- *             uap     User argument descriptor (see below)
- *             retval  (ignored)
- *
- * Indirect parameters:        uap->path       Directory path
- *
- * Returns:    0                       Success
- *             common_chdir: ENOTDIR
- *             common_chdir: ENOENT    No such file or directory
- *             common_chdir: ???
- *
+ * Sync in a separate thread so we can time out if it blocks.
   */
   */
-int
-__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
+static int
+sync_async(int timeout)
  {
  {
-       return common_chdir(p, (void *)uap, 1);
+       thread_t thd;
+       int error;
+       struct timespec ts = {timeout, 0};
+
+       lck_mtx_lock(sync_mtx_lck);
+       if (kernel_thread_start(sync_thread, &timeout, &thd) != KERN_SUCCESS) {
+               printf("sync_thread failed\n");
+               lck_mtx_unlock(sync_mtx_lck);
+               return (0);
+       }
+
+       error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS | PDROP | PCATCH), "sync_thread", &ts);
+       if (error) {
+               printf("sync timed out: %d sec\n", timeout);
+       }
+       thread_deallocate(thd);
+
+       return (0);
  }
  
  }
  
+/*
+ * An in-kernel sync for power management to call.
+ */
+__private_extern__ int
+sync_internal(void)
+{
+       (void) sync_async(sync_timeout);
+
+       return 0;
+} /* end of sync_internal call */
  
  /*
  
  /*
- * Change notion of root (``/'') directory.
+ * Change filesystem quotas.
   */
   */
-/* ARGSUSED */
+#if QUOTA
  int
  int
-chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
+quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
  {
  {
-       struct filedesc *fdp = p->p_fd;
-       int error;
+       struct mount *mp;
+       int error, quota_cmd, quota_status;
+       caddr_t datap;
+       size_t fnamelen;
         struct nameidata nd;
         struct nameidata nd;
-       vnode_t tvp;
         vfs_context_t ctx = vfs_context_current();
         vfs_context_t ctx = vfs_context_current();
+       struct dqblk my_dqblk;
  
  
-       if ((error = suser(kauth_cred_get(), &p->p_acflag)))
-               return (error);
-
-       NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
-               UIO_USERSPACE, uap->path, ctx);
-       error = change_dir(&nd, ctx);
+       AUDIT_ARG(uid, uap->uid);
+       AUDIT_ARG(cmd, uap->cmd);
+       NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+              uap->path, ctx);
+       error = namei(&nd);
         if (error)
                 return (error);
         if (error)
                 return (error);
+       mp = nd.ni_vp->v_mount;
+       vnode_put(nd.ni_vp);
+       nameidone(&nd);
  
  
-#if CONFIG_MACF
-       error = mac_vnode_check_chroot(ctx, nd.ni_vp,
-           &nd.ni_cnd);
-       if (error) {
-               vnode_put(nd.ni_vp);
-               return (error);
-       }
-#endif
+       /* copyin any data we will need for downstream code */
+       quota_cmd = uap->cmd >> SUBCMDSHIFT;
  
  
-       if ( (error = vnode_ref(nd.ni_vp)) ) {
-               vnode_put(nd.ni_vp);
-               return (error);
-       }
-       vnode_put(nd.ni_vp);
+       switch (quota_cmd) {
+       case Q_QUOTAON:
+               /* uap->arg specifies a file from which to take the quotas */
+               fnamelen = MAXPATHLEN;
+               datap = kalloc(MAXPATHLEN);
+               error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
+               break;
+       case Q_GETQUOTA:
+               /* uap->arg is a pointer to a dqblk structure. */
+               datap = (caddr_t) &my_dqblk;
+               break;
+       case Q_SETQUOTA:
+       case Q_SETUSE:
+               /* uap->arg is a pointer to a dqblk structure. */
+               datap = (caddr_t) &my_dqblk;
+               if (proc_is64bit(p)) {
+                       struct user_dqblk       my_dqblk64;
+                       error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
+                       if (error == 0) {
+                               munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
+                       }
+               }
+               else {
+                       error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
+               }
+               break;
+       case Q_QUOTASTAT:
+               /* uap->arg is a pointer to an integer */
+               datap = (caddr_t) &quota_status;
+               break;
+       default:
+               datap = NULL;
+               break;
+       } /* switch */
  
  
-       proc_fdlock(p);
-       tvp = fdp->fd_rdir;
-       fdp->fd_rdir = nd.ni_vp;
-       fdp->fd_flags |= FD_CHROOT;
-       proc_fdunlock(p);
+       if (error == 0) {
+               error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
+       }
  
  
-       if (tvp != NULL)
-               vnode_rele(tvp);
+       switch (quota_cmd) {
+       case Q_QUOTAON:
+               if (datap != NULL)
+                       kfree(datap, MAXPATHLEN);
+               break;
+       case Q_GETQUOTA:
+               /* uap->arg is a pointer to a dqblk structure we need to copy out to */
+               if (error == 0) {
+                       if (proc_is64bit(p)) {
+                               struct user_dqblk       my_dqblk64 = {.dqb_bhardlimit = 0};
+                               munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
+                               error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
+                       }
+                       else {
+                               error = copyout(datap, uap->arg, sizeof (struct dqblk));
+                       }
+               }
+               break;
+       case Q_QUOTASTAT:
+               /* uap->arg is a pointer to an integer */
+               if (error == 0) {
+                       error = copyout(datap, uap->arg, sizeof(quota_status));
+               }
+               break;
+       default:
+               break;
+       } /* switch */
  
  
-       return (0);
+       return (error);
+}
+#else
+int
+quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
+{
+       return (EOPNOTSUPP);
  }
  }
+#endif /* QUOTA */
  
  /*
  
  /*
- * Common routine for chroot and chdir.
+ * Get filesystem statistics.
   *
   * Returns:    0                       Success
   *
   * Returns:    0                       Success
- *             ENOTDIR                 Not a directory
- *             namei:???               [anything namei can return]
- *             vnode_authorize:???     [anything vnode_authorize can return]
+ *     namei:???
+ *     vfs_update_vfsstat:???
+ *     munge_statfs:EFAULT
   */
   */
-static int
-change_dir(struct nameidata *ndp, vfs_context_t ctx)
+/* ARGSUSED */
+int
+statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
  {
  {
-       vnode_t vp;
+       struct mount *mp;
+       struct vfsstatfs *sp;
         int error;
         int error;
+       struct nameidata nd;
+       vfs_context_t ctx = vfs_context_current();
+       vnode_t vp;
  
  
-       if ((error = namei(ndp)))
-               return (error);
-       nameidone(ndp);
-       vp = ndp->ni_vp;
-
-       if (vp->v_type != VDIR) {
-               vnode_put(vp);
-               return (ENOTDIR);
-       }
-
-#if CONFIG_MACF
-       error = mac_vnode_check_chdir(ctx, vp);
-       if (error) {
-               vnode_put(vp);
+       NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1, 
+               UIO_USERSPACE, uap->path, ctx);
+       error = namei(&nd);
+       if (error)
                 return (error);
                 return (error);
-       }
-#endif
+       vp = nd.ni_vp;
+       mp = vp->v_mount;
+       sp = &mp->mnt_vfsstat;
+       nameidone(&nd);
  
  
-       error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
-       if (error) {
+       error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
+       if (error != 0) { 
                 vnode_put(vp);
                 return (error);
         }
  
                 vnode_put(vp);
                 return (error);
         }
  
+       error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
+       vnode_put(vp);
         return (error);
  }
  
  /*
         return (error);
  }
  
  /*
- * Check permissions, allocate an open file structure,
- * and call the device open routine if any.
- *
- * Returns:    0                       Success
- *             EINVAL
- *             EINTR
- *     falloc:ENFILE
- *     falloc:EMFILE
- *     falloc:ENOMEM
- *     vn_open_auth:???
- *     dupfdopen:???
- *     VNOP_ADVLOCK:???
- *     vnode_setsize:???
- *
- * XXX Need to implement uid, gid
+ * Get filesystem statistics.
   */
   */
+/* ARGSUSED */
  int
  int
-open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *vap, int32_t *retval)
+fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
  {
  {
-       proc_t p = vfs_context_proc(ctx);
-       uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
-       struct filedesc *fdp = p->p_fd;
-       struct fileproc *fp;
         vnode_t vp;
         vnode_t vp;
-       int flags, oflags;
-       struct fileproc *nfp;
-       int type, indx, error;
-       struct flock lf;
-       int no_controlling_tty = 0;
-       int deny_controlling_tty = 0;
-       struct session *sessp = SESSION_NULL;
-       struct vfs_context context = *vfs_context_current();    /* local copy */
-
-       oflags = uflags;
-
-       if ((oflags & O_ACCMODE) == O_ACCMODE)
-               return(EINVAL);
-       flags = FFLAGS(uflags);
+       struct mount *mp;
+       struct vfsstatfs *sp;
+       int error;
  
  
-       AUDIT_ARG(fflags, oflags);
-       AUDIT_ARG(mode, vap->va_mode);
+       AUDIT_ARG(fd, uap->fd);
  
  
-       if ( (error = falloc(p, &nfp, &indx, ctx)) ) {
+       if ( (error = file_vnode(uap->fd, &vp)) )
                 return (error);
                 return (error);
-       }
-       fp = nfp;
-       uu->uu_dupfd = -indx - 1;
  
  
-       if (!(p->p_flag & P_CONTROLT)) {
-               sessp = proc_session(p);
-               no_controlling_tty = 1;
-               /*
-                * If conditions would warrant getting a controlling tty if
-                * the device being opened is a tty (see ttyopen in tty.c),
-                * but the open flags deny it, set a flag in the session to
-                * prevent it.
-                */
-               if (SESS_LEADER(p, sessp) &&
-                   sessp->s_ttyvp == NULL &&
-                   (flags & O_NOCTTY)) {
-                       session_lock(sessp);
-                       sessp->s_flags |= S_NOCTTY;
-                       session_unlock(sessp);
-                       deny_controlling_tty = 1;
-               }
+       error = vnode_getwithref(vp);
+       if (error) {
+               file_drop(uap->fd);
+               return (error);
         }
  
         }
  
-       if ((error = vn_open_auth(ndp, &flags, vap))) {
-               if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){        /* XXX from fdopen */
-                       if ((error = dupfdopen(fdp, indx, uu->uu_dupfd, flags, error)) == 0) {
-                               fp_drop(p, indx, NULL, 0);
-                               *retval = indx;
-                               if (deny_controlling_tty) {
-                                       session_lock(sessp);
-                                       sessp->s_flags &= ~S_NOCTTY;
-                                       session_unlock(sessp);
-                               }
-                               if (sessp != SESSION_NULL)
-                                       session_rele(sessp);
-                               return (0);
-                       }
-               }
-               if (error == ERESTART)
-                       error = EINTR;
-               fp_free(p, indx, fp);
+       AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
  
  
-               if (deny_controlling_tty) {
-                       session_lock(sessp);
-                       sessp->s_flags &= ~S_NOCTTY;
-                       session_unlock(sessp);
-               }
-               if (sessp != SESSION_NULL)
-                       session_rele(sessp);
-               return (error);
+       mp = vp->v_mount;
+       if (!mp) {
+               error = EBADF;
+               goto out;
+       }
+       sp = &mp->mnt_vfsstat;
+       if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
+               goto out;
         }
         }
-       uu->uu_dupfd = 0;
-       vp = ndp->ni_vp;
  
  
-       fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
-       fp->f_fglob->fg_type = DTYPE_VNODE;
-       fp->f_fglob->fg_ops = &vnops;
-       fp->f_fglob->fg_data = (caddr_t)vp;
+       error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
  
  
-       if (flags & (O_EXLOCK | O_SHLOCK)) {
-               lf.l_whence = SEEK_SET;
-               lf.l_start = 0;
-               lf.l_len = 0;
-               if (flags & O_EXLOCK)
-                       lf.l_type = F_WRLCK;
-               else
-                       lf.l_type = F_RDLCK;
-               type = F_FLOCK;
-               if ((flags & FNONBLOCK) == 0)
-                       type |= F_WAIT;
-#if CONFIG_MACF
-               error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
-                   F_SETLK, &lf);
-               if (error)
-                       goto bad;
-#endif
-               if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx)))
-                       goto bad;
-               fp->f_fglob->fg_flag |= FHASLOCK;
-       }
+out:
+       file_drop(uap->fd);
+       vnode_put(vp);
  
  
-       /* try to truncate by setting the size attribute */
-       if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
-               goto bad;
+       return (error);
+}
  
  
-       /*
-        * If the open flags denied the acquisition of a controlling tty,
-        * clear the flag in the session structure that prevented the lower
-        * level code from assigning one.
-        */
-       if (deny_controlling_tty) {
-               session_lock(sessp);
-               sessp->s_flags &= ~S_NOCTTY;
-               session_unlock(sessp);
-       }
+/* 
+ * Common routine to handle copying of statfs64 data to user space 
+ */
+static int 
+statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
+{
+       int error;
+       struct statfs64 sfs;
+       
+       bzero(&sfs, sizeof(sfs));
  
  
-       /*
-        * If a controlling tty was set by the tty line discipline, then we
-        * want to set the vp of the tty into the session structure.  We have
-        * a race here because we can't get to the vp for the tp in ttyopen,
-        * because it's not passed as a parameter in the open path.
-        */
-       if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
-               vnode_t ttyvp;
-               vnode_ref(vp);
-               session_lock(sessp);
-               ttyvp = sessp->s_ttyvp;
-               sessp->s_ttyvp = vp;
-               sessp->s_ttyvid = vnode_vid(vp);
-               session_unlock(sessp);
-               if (ttyvp != NULLVP)
-                       vnode_rele(ttyvp);
+       sfs.f_bsize = sfsp->f_bsize;
+       sfs.f_iosize = (int32_t)sfsp->f_iosize;
+       sfs.f_blocks = sfsp->f_blocks;
+       sfs.f_bfree = sfsp->f_bfree;
+       sfs.f_bavail = sfsp->f_bavail;
+       sfs.f_files = sfsp->f_files;
+       sfs.f_ffree = sfsp->f_ffree;
+       sfs.f_fsid = sfsp->f_fsid;
+       sfs.f_owner = sfsp->f_owner;
+       sfs.f_type = mp->mnt_vtable->vfc_typenum;
+       sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+       sfs.f_fssubtype = sfsp->f_fssubtype;
+       if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
+               strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
+       } else {
+               strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
         }
         }
+       strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
+       strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
  
  
-       vnode_put(vp);
+       error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
  
  
-       proc_fdlock(p);
-       procfdtbl_releasefd(p, indx, NULL);
-       fp_drop(p, indx, fp, 1);
-       proc_fdunlock(p);
+       return(error);
+}
  
  
-       *retval = indx;
+/* 
+ * Get file system statistics in 64-bit mode 
+ */
+int
+statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
+{
+       struct mount *mp;
+       struct vfsstatfs *sp;
+       int error;
+       struct nameidata nd;
+       vfs_context_t ctxp = vfs_context_current();
+       vnode_t vp;
  
  
-       if (sessp != SESSION_NULL)
-               session_rele(sessp);
-       return (0);
-bad:
-       if (deny_controlling_tty) {
-               session_lock(sessp);
-               sessp->s_flags &= ~S_NOCTTY;
-               session_unlock(sessp);
-       }
-       if (sessp != SESSION_NULL)
-               session_rele(sessp);
+       NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1, 
+               UIO_USERSPACE, uap->path, ctxp);
+       error = namei(&nd);
+       if (error)
+               return (error);
+       vp = nd.ni_vp;
+       mp = vp->v_mount;
+       sp = &mp->mnt_vfsstat;
+       nameidone(&nd);
  
  
-       /* Modify local copy (to not damage thread copy) */
-       context.vc_ucred = fp->f_fglob->fg_cred;
+       error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
+       if (error != 0) { 
+               vnode_put(vp);
+               return (error);
+       }
  
  
-       vn_close(vp, fp->f_fglob->fg_flag, &context);
+       error = statfs64_common(mp, sp, uap->buf);
         vnode_put(vp);
         vnode_put(vp);
-       fp_free(p, indx, fp);
  
         return (error);
  
         return (error);
-
  }
  
  }
  
-/*
- * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
- *
- * Parameters: p                       Process requesting the open
- *             uap                     User argument descriptor (see below)
- *             retval                  Pointer to an area to receive the
- *                                     return calue from the system call
- *
- * Indirect:   uap->path               Path to open (same as 'open')
- *             uap->flags              Flags to open (same as 'open'
- *             uap->uid                UID to set, if creating
- *             uap->gid                GID to set, if creating
- *             uap->mode               File mode, if creating (same as 'open')
- *             uap->xsecurity          ACL to set, if creating
- *
- * Returns:    0                       Success
- *             !0                      errno value
- *
- * Notes:      The kauth_filesec_t in 'va', if any, is in host byte order.
- *
- * XXX:                We should enummerate the possible errno values here, and where
- *             in the code they originated.
+/* 
+ * Get file system statistics in 64-bit mode 
   */
  int
   */
  int
-open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
+fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
  {
  {
-       struct filedesc *fdp = p->p_fd;
-       int ciferror;
-       kauth_filesec_t xsecdst;
-       struct vnode_attr va;
-       struct nameidata nd;
-       int cmode;
+       struct vnode *vp;
+       struct mount *mp;
+       struct vfsstatfs *sp;
+       int error;
  
  
-       AUDIT_ARG(owner, uap->uid, uap->gid);
+       AUDIT_ARG(fd, uap->fd);
  
  
-       xsecdst = NULL;
-       if ((uap->xsecurity != USER_ADDR_NULL) &&
-           ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
-               return ciferror;
+       if ( (error = file_vnode(uap->fd, &vp)) )
+               return (error);
  
  
-       VATTR_INIT(&va);
-       cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
-       VATTR_SET(&va, va_mode, cmode);
-       if (uap->uid != KAUTH_UID_NONE)
-               VATTR_SET(&va, va_uid, uap->uid);
-       if (uap->gid != KAUTH_GID_NONE)
-               VATTR_SET(&va, va_gid, uap->gid);
-       if (xsecdst != NULL)
-               VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
+       error = vnode_getwithref(vp);
+       if (error) {
+               file_drop(uap->fd);
+               return (error);
+       }
  
  
-       NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
+       AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
  
  
-       ciferror = open1(vfs_context_current(), &nd, uap->flags, &va, retval);
-       if (xsecdst != NULL)
-               kauth_filesec_free(xsecdst);
+       mp = vp->v_mount;
+       if (!mp) {
+               error = EBADF;
+               goto out;
+       }
+       sp = &mp->mnt_vfsstat;
+       if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
+               goto out;
+       }
  
  
-       return ciferror;
-}
+       error = statfs64_common(mp, sp, uap->buf);
  
  
-int
-open(proc_t p, struct open_args *uap, int32_t *retval)
-{
-       __pthread_testcancel(1);
-       return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
+out:
+       file_drop(uap->fd);
+       vnode_put(vp);
+
+       return (error);
  }
  
  }
  
-int
-open_nocancel(proc_t p, struct open_nocancel_args *uap, int32_t *retval)
+struct getfsstat_struct {
+       user_addr_t     sfsp;
+       user_addr_t     *mp;
+       int             count;
+       int             maxcount;
+       int             flags;
+       int             error;
+};
+
+
+static int
+getfsstat_callback(mount_t mp, void * arg)
  {
  {
-       struct filedesc *fdp = p->p_fd;
-       struct vnode_attr va;
-       struct nameidata nd;
-       int cmode;
+       
+       struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
+       struct vfsstatfs *sp;
+       int error, my_size;
+       vfs_context_t ctx = vfs_context_current();
  
  
-       VATTR_INIT(&va);
-       /* Mask off all but regular access permissions */
-       cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
-       VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
+       if (fstp->sfsp && fstp->count < fstp->maxcount) {
+               sp = &mp->mnt_vfsstat;
+               /*
+                * If MNT_NOWAIT is specified, do not refresh the
+                * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
+                */
+               if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
+                       (error = vfs_update_vfsstat(mp, ctx,
+                           VFS_USER_EVENT))) {
+                       KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
+                       return(VFS_RETURNED);
+               }
  
  
-       NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
+               /*
+                * Need to handle LP64 version of struct statfs
+                */
+               error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
+               if (error) {
+                       fstp->error = error;
+                       return(VFS_RETURNED_DONE);
+               }
+               fstp->sfsp += my_size;
  
  
-       return(open1(vfs_context_current(), &nd, uap->flags, &va, retval));
+               if (fstp->mp) {
+#if CONFIG_MACF
+                       error = mac_mount_label_get(mp, *fstp->mp);
+                       if (error) {
+                               fstp->error = error;
+                               return(VFS_RETURNED_DONE);
+                       }
+#endif
+                       fstp->mp++;
+               }
+       }
+       fstp->count++;
+       return(VFS_RETURNED);
  }
  
  }
  
-
  /*
  /*
- * Create a special file.
+ * Get statistics on all filesystems.
   */
   */
-static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
-
  int
  int
-mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
+getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
  {
  {
-       struct vnode_attr va;
-       vfs_context_t ctx = vfs_context_current();
-       int error;
-       int whiteout = 0;
-       struct nameidata nd;
-       vnode_t vp, dvp;
+       struct __mac_getfsstat_args muap;
  
  
-       VATTR_INIT(&va);
-       VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
-       VATTR_SET(&va, va_rdev, uap->dev);
+       muap.buf = uap->buf;
+       muap.bufsize = uap->bufsize;
+       muap.mac = USER_ADDR_NULL;
+       muap.macsize = 0;
+       muap.flags = uap->flags;
  
  
-       /* If it's a mknod() of a FIFO, call mkfifo1() instead */
-       if ((uap->mode & S_IFMT) == S_IFIFO)
-               return(mkfifo1(ctx, uap->path, &va));
+       return (__mac_getfsstat(p, &muap, retval));
+}
  
  
-       AUDIT_ARG(mode, uap->mode);
-       AUDIT_ARG(value32, uap->dev);
+/*
+ * __mac_getfsstat: Get MAC-related file system statistics
+ *
+ * Parameters:    p                        (ignored)
+ *                uap                      User argument descriptor (see below)
+ *                retval                   Count of file system statistics (N stats)  
+ *
+ * Indirect:      uap->bufsize             Buffer size
+ *                uap->macsize             MAC info size
+ *                uap->buf                 Buffer where information will be returned
+ *                uap->mac                 MAC info
+ *                uap->flags               File system flags
+ *                
+ *
+ * Returns:        0                       Success
+ *                !0                       Not success
+ *
+ */
+int
+__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
+{
+       user_addr_t sfsp;
+       user_addr_t *mp;
+       size_t count, maxcount, bufsize, macsize;
+       struct getfsstat_struct fst;
  
  
-       if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
-               return (error);
-       NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, 
-               UIO_USERSPACE, uap->path, ctx);
-       error = namei(&nd);
-       if (error)
-               return (error);
-       dvp = nd.ni_dvp;
-       vp = nd.ni_vp;
+       bufsize = (size_t) uap->bufsize;
+       macsize = (size_t) uap->macsize;
  
  
-       if (vp != NULL) {
-               error = EEXIST;
-               goto out;
+       if (IS_64BIT_PROCESS(p)) {
+               maxcount = bufsize / sizeof(struct user64_statfs);
         }
         }
-
-       switch (uap->mode & S_IFMT) {
-       case S_IFMT:    /* used by badsect to flag bad sectors */
-               VATTR_SET(&va, va_type, VBAD);
-               break;
-       case S_IFCHR:
-               VATTR_SET(&va, va_type, VCHR);
-               break;
-       case S_IFBLK:
-               VATTR_SET(&va, va_type, VBLK);
-               break;
-       case S_IFWHT:
-               whiteout = 1;
-               break;
-       default:
-               error = EINVAL;
-               goto out;
+       else {
+               maxcount = bufsize / sizeof(struct user32_statfs);
         }
         }
+       sfsp = uap->buf;
+       count = 0;
+
+       mp = NULL;
  
  #if CONFIG_MACF
  
  #if CONFIG_MACF
-       if (!whiteout) {
-               error = mac_vnode_check_create(ctx,
-                   nd.ni_dvp, &nd.ni_cnd, &va);
-               if (error)
-                       goto out;
-       }
-#endif
+       if (uap->mac != USER_ADDR_NULL) {
+               u_int32_t *mp0;
+               int error;
+               unsigned int i;
  
  
-       if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
-               goto out;
+               count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
+               if (count != maxcount)
+                       return (EINVAL);
  
  
-       if (whiteout) {
-               error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, CREATE, ctx);
-       } else {
-               error = vn_create(dvp, &vp, &nd.ni_cnd, &va, 0, ctx);
-       }
-       if (error)
-               goto out;
+               /* Copy in the array */
+               MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
+               if (mp0 == NULL) {
+                       return (ENOMEM);
+               }
  
  
-       if (vp) {
-               int     update_flags = 0;
+               error = copyin(uap->mac, mp0, macsize);
+               if (error) {
+                       FREE(mp0, M_MACTEMP);
+                       return (error);
+               }
  
  
-               // Make sure the name & parent pointers are hooked up
-               if (vp->v_name == NULL)
-                       update_flags |= VNODE_UPDATE_NAME;
-               if (vp->v_parent == NULLVP)
-                       update_flags |= VNODE_UPDATE_PARENT;
+               /* Normalize to an array of user_addr_t */
+               MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
+               if (mp == NULL) {
+                       FREE(mp0, M_MACTEMP);
+                       return (ENOMEM);
+               }
  
  
-               if (update_flags)
-                       vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
+               for (i = 0; i < count; i++) {
+                       if (IS_64BIT_PROCESS(p))
+                               mp[i] = ((user_addr_t *)mp0)[i];
+                       else
+                               mp[i] = (user_addr_t)mp0[i];
+               }
+               FREE(mp0, M_MACTEMP);
+       }
+#endif
  
  
-#if CONFIG_FSE
-               add_fsevent(FSE_CREATE_FILE, ctx,
-                   FSE_ARG_VNODE, vp,
-                   FSE_ARG_DONE);
-#endif
-       }
  
  
-out:
-       /*
-        * nameidone has to happen before we vnode_put(dvp)
-        * since it may need to release the fs_nodelock on the dvp
-        */
-       nameidone(&nd);
+       fst.sfsp = sfsp;
+       fst.mp = mp;
+       fst.flags = uap->flags;
+       fst.count = 0;
+       fst.error = 0;
+       fst.maxcount = maxcount;
  
  
-       if (vp)
-               vnode_put(vp);
-       vnode_put(dvp);
+       
+       vfs_iterate(0, getfsstat_callback, &fst);
  
  
-       return (error);
+       if (mp)
+               FREE(mp, M_MACTEMP);
+
+       if (fst.error ) {
+               KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
+               return(fst.error);
+       }
+
+       if (fst.sfsp && fst.count > fst.maxcount)
+               *retval = fst.maxcount;
+       else
+               *retval = fst.count;
+       return (0);
  }
  
  }
  
-/*
- * Create a named pipe.
- *
- * Returns:    0                       Success
- *             EEXIST
- *     namei:???
- *     vnode_authorize:???
- *     vn_create:???
- */
  static int
  static int
-mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
+getfsstat64_callback(mount_t mp, void * arg)
  {
  {
-       vnode_t vp, dvp;
+       struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
+       struct vfsstatfs *sp;
         int error;
         int error;
-       struct nameidata nd;
  
  
-       NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, 
-               UIO_USERSPACE, upath, ctx);
-       error = namei(&nd);
-       if (error)
-               return (error);
-       dvp = nd.ni_dvp;
-       vp = nd.ni_vp;
+       if (fstp->sfsp && fstp->count < fstp->maxcount) {
+               sp = &mp->mnt_vfsstat;
+               /*
+                * If MNT_NOWAIT is specified, do not refresh the fsstat
+                * cache. MNT_WAIT overrides MNT_NOWAIT.
+                *
+                * We treat MNT_DWAIT as MNT_WAIT for all instances of
+                * getfsstat, since the constants are out of the same
+                * namespace.
+                */
+               if (((fstp->flags & MNT_NOWAIT) == 0 ||
+                    (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
+                   (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
+                       KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
+                       return(VFS_RETURNED);
+               }
  
  
-       /* check that this is a new file and authorize addition */
-       if (vp != NULL) {
-               error = EEXIST;
-               goto out;
-       }
-       VATTR_SET(vap, va_type, VFIFO);
+               error = statfs64_common(mp, sp, fstp->sfsp);
+               if (error) {
+                       fstp->error = error;
+                       return(VFS_RETURNED_DONE);
+               }
+               fstp->sfsp += sizeof(struct statfs64);
+       }
+       fstp->count++;
+       return(VFS_RETURNED);
+}
  
  
-#if CONFIG_MACF
-       error = mac_vnode_check_create(ctx, nd.ni_dvp,
-           &nd.ni_cnd, vap);
-       if (error)
-               goto out;
-#endif
+/*
+ * Get statistics on all file systems in 64 bit mode.
+ */
+int
+getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
+{
+       user_addr_t sfsp;
+       int count, maxcount;
+       struct getfsstat_struct fst;
  
  
+       maxcount = uap->bufsize / sizeof(struct statfs64);
  
  
-       if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
-               goto out;
+       sfsp = uap->buf;
+       count = 0;
  
  
-       
-       error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx);
-out:
-       /*
-        * nameidone has to happen before we vnode_put(dvp)
-        * since it may need to release the fs_nodelock on the dvp
-        */
-       nameidone(&nd);
+       fst.sfsp = sfsp;
+       fst.flags = uap->flags;
+       fst.count = 0;
+       fst.error = 0;
+       fst.maxcount = maxcount;
  
  
-       if (vp)
-               vnode_put(vp);
-       vnode_put(dvp);
+       vfs_iterate(0, getfsstat64_callback, &fst);
  
  
-       return error;
-}
+       if (fst.error ) {
+               KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
+               return(fst.error);
+       }
+
+       if (fst.sfsp && fst.count > fst.maxcount)
+               *retval = fst.maxcount;
+       else
+               *retval = fst.count;
  
  
+       return (0);
+}
  
  /*
  
  /*
- * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
- *
- * Parameters: p                       Process requesting the open
- *             uap                     User argument descriptor (see below)
- *             retval                  (Ignored)
- *
- * Indirect:   uap->path               Path to fifo (same as 'mkfifo')
- *             uap->uid                UID to set
- *             uap->gid                GID to set
- *             uap->mode               File mode to set (same as 'mkfifo')
- *             uap->xsecurity          ACL to set, if creating
+ * gets the associated vnode with the file descriptor passed.
+ * as input
   *
   *
- * Returns:    0                       Success
- *             !0                      errno value
+ * INPUT
+ * ctx - vfs context of caller
+ * fd - file descriptor for which vnode is required.
+ * vpp - Pointer to pointer to vnode to be returned.
   *
   *
- * Notes:      The kauth_filesec_t in 'va', if any, is in host byte order.
+ * The vnode is returned with an iocount so any vnode obtained
+ * by this call needs a vnode_put
   *
   *
- * XXX:                We should enummerate the possible errno values here, and where
- *             in the code they originated.
   */
   */
-int
-mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
+static int
+vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
  {
  {
-       int ciferror;
-       kauth_filesec_t xsecdst;
-       struct vnode_attr va;
-
-       AUDIT_ARG(owner, uap->uid, uap->gid);
+       int error;
+       vnode_t vp;
+       struct fileproc *fp;
+       proc_t p = vfs_context_proc(ctx);
  
  
-       xsecdst = KAUTH_FILESEC_NONE;
-       if (uap->xsecurity != USER_ADDR_NULL) {
-               if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
-                       return ciferror;
-       }
+       *vpp =  NULLVP;
  
  
-       VATTR_INIT(&va);
-       VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
-       if (uap->uid != KAUTH_UID_NONE)
-               VATTR_SET(&va, va_uid, uap->uid);
-       if (uap->gid != KAUTH_GID_NONE)
-               VATTR_SET(&va, va_gid, uap->gid);
-       if (xsecdst != KAUTH_FILESEC_NONE)
-               VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
+       error = fp_getfvp(p, fd, &fp, &vp);
+       if (error)
+               return (error);
  
  
-       ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
+       error = vnode_getwithref(vp);
+       if (error) {
+               (void)fp_drop(p, fd, fp, 0);
+               return (error);
+       }
  
  
-       if (xsecdst != KAUTH_FILESEC_NONE)
-               kauth_filesec_free(xsecdst);
-       return ciferror;
+       (void)fp_drop(p, fd, fp, 0);
+       *vpp = vp;
+       return (error);
  }
  
  }
  
-/* ARGSUSED */
+/*
+ * Wrapper function around namei to start lookup from a directory
+ * specified by a file descriptor ni_dirfd.
+ *
+ * In addition to all the errors returned by namei, this call can
+ * return ENOTDIR if the file descriptor does not refer to a directory.
+ * and EBADF if the file descriptor is not valid.
+ */
  int
  int
-mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
+nameiat(struct nameidata *ndp, int dirfd)
  {
  {
-       struct vnode_attr va;
+       if ((dirfd != AT_FDCWD) &&
+           !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
+           !(ndp->ni_cnd.cn_flags & USEDVP)) {
+               int error = 0;
+               char c;
  
  
-       VATTR_INIT(&va);
-       VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
+               if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
+                       error = copyin(ndp->ni_dirp, &c, sizeof(char));
+                       if (error)
+                               return (error);
+               } else {
+                       c = *((char *)(ndp->ni_dirp));
+               }
  
  
-       return(mkfifo1(vfs_context_current(), uap->path, &va));
-}
+               if (c != '/') {
+                       vnode_t dvp_at;
  
  
+                       error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
+                           &dvp_at);
+                       if (error)
+                               return (error);
  
  
-static char *
-my_strrchr(char *p, int ch)
-{
-       char *save;
+                       if (vnode_vtype(dvp_at) != VDIR) {
+                               vnode_put(dvp_at);
+                               return (ENOTDIR);
+                       }
  
  
-       for (save = NULL;; ++p) {
-               if (*p == ch)
-                       save = p;
-               if (!*p)
-                       return(save);
+                       ndp->ni_dvp = dvp_at;
+                       ndp->ni_cnd.cn_flags |= USEDVP;
+                       error = namei(ndp);
+                       ndp->ni_cnd.cn_flags &= ~USEDVP;
+                       vnode_put(dvp_at);
+                       return (error);
+               }
         }
         }
-       /* NOTREACHED */
-}
  
  
-extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
+       return (namei(ndp));
+}
  
  
-int
-safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
+/*
+ * Change current working directory to a given file descriptor.
+ */
+/* ARGSUSED */
+static int
+common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
  {
  {
-       int ret, len = _len;
-
-       *truncated_path = 0;
-       ret = vn_getpath(dvp, path, &len);
-       if (ret == 0 && len < (MAXPATHLEN - 1)) {
-               if (leafname) {
-                       path[len-1] = '/';
-                       len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
-                       if (len > MAXPATHLEN) {
-                               char *ptr;
-                       
-                               // the string got truncated!
-                               *truncated_path = 1;
-                               ptr = my_strrchr(path, '/');
-                               if (ptr) {
-                                       *ptr = '\0';   // chop off the string at the last directory component
-                               }
-                               len = strlen(path) + 1;
-                       }
-               }
-       } else if (ret == 0) {
-               *truncated_path = 1;
-       } else if (ret != 0) {
-               struct vnode *mydvp=dvp;
+       struct filedesc *fdp = p->p_fd;
+       vnode_t vp;
+       vnode_t tdp;
+       vnode_t tvp;
+       struct mount *mp;
+       int error;
+       vfs_context_t ctx = vfs_context_current();
  
  
-               if (ret != ENOSPC) {
-                       printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
-                              dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
-               }                               
-               *truncated_path = 1;
-               
-               do {
-                       if (mydvp->v_parent != NULL) {
-                               mydvp = mydvp->v_parent;
-                       } else if (mydvp->v_mount) {
-                               strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
-                               break;
-                       } else {
-                               // no parent and no mount point?  only thing is to punt and say "/" changed
-                               strlcpy(path, "/", _len);
-                               len = 2;
-                               mydvp = NULL;
-                       }
-                       
-                       if (mydvp == NULL) {
-                               break;
+       AUDIT_ARG(fd, uap->fd);
+       if (per_thread && uap->fd == -1) {
+               /*
+                * Switching back from per-thread to per process CWD; verify we
+                * in fact have one before proceeding.  The only success case
+                * for this code path is to return 0 preemptively after zapping
+                * the thread structure contents.
+                */
+               thread_t th = vfs_context_thread(ctx);
+               if (th) {
+                       uthread_t uth = get_bsdthread_info(th);
+                       tvp = uth->uu_cdir;
+                       uth->uu_cdir = NULLVP;
+                       if (tvp != NULLVP) {
+                               vnode_rele(tvp);
+                               return (0);
                         }
                         }
+               }
+               return (EBADF);
+       }
  
  
-                       len = _len;
-                       ret = vn_getpath(mydvp, path, &len);
-               } while (ret == ENOSPC);
+       if ( (error = file_vnode(uap->fd, &vp)) )
+               return(error);
+       if ( (error = vnode_getwithref(vp)) ) {
+               file_drop(uap->fd);
+               return(error);
         }
  
         }
  
-       return len;
+       AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+       if (vp->v_type != VDIR) {
+               error = ENOTDIR;
+               goto out;
+       }
+
+#if CONFIG_MACF
+       error = mac_vnode_check_chdir(ctx, vp);
+       if (error)
+               goto out;
+#endif
+       error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
+       if (error)
+               goto out;
+
+       while (!error && (mp = vp->v_mountedhere) != NULL) {
+               if (vfs_busy(mp, LK_NOWAIT)) {
+                       error = EACCES;
+                       goto out;
+               }
+               error = VFS_ROOT(mp, &tdp, ctx);
+               vfs_unbusy(mp);
+               if (error)
+                       break;
+               vnode_put(vp);
+               vp = tdp;
+       }
+       if (error)
+               goto out;
+       if ( (error = vnode_ref(vp)) )
+               goto out;
+       vnode_put(vp);
+
+       if (per_thread) {
+               thread_t th = vfs_context_thread(ctx);
+               if (th) {
+                       uthread_t uth = get_bsdthread_info(th);
+                       tvp = uth->uu_cdir;
+                       uth->uu_cdir = vp;
+                       OSBitOrAtomic(P_THCWD, &p->p_flag);
+               } else {
+                       vnode_rele(vp);
+                       return (ENOENT);
+               }
+       } else {
+               proc_fdlock(p);
+               tvp = fdp->fd_cdir;
+               fdp->fd_cdir = vp;
+               proc_fdunlock(p);
+       }
+
+       if (tvp)
+               vnode_rele(tvp);
+       file_drop(uap->fd);
+
+       return (0);
+out:
+       vnode_put(vp);
+       file_drop(uap->fd);
+
+       return(error);
+}
+
+int
+fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
+{
+       return common_fchdir(p, uap, 0);
  }
  
  }
  
+int
+__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
+{
+       return common_fchdir(p, (void *)uap, 1);
+}
  
  /*
  
  /*
- * Make a hard file link.
+ * Change current working directory (".").
   *
   * Returns:    0                       Success
   *
   * Returns:    0                       Success
- *             EPERM
- *             EEXIST
- *             EXDEV
- *     namei:???
- *     vnode_authorize:???
- *     VNOP_LINK:???
+ *     change_dir:ENOTDIR
+ *     change_dir:???
+ *     vnode_ref:ENOENT                No such file or directory
   */
  /* ARGSUSED */
   */
  /* ARGSUSED */
-int
-link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
+static int
+common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
  {
  {
-       vnode_t vp, dvp, lvp;
+       struct filedesc *fdp = p->p_fd;
+       int error;
         struct nameidata nd;
         struct nameidata nd;
+       vnode_t tvp;
         vfs_context_t ctx = vfs_context_current();
         vfs_context_t ctx = vfs_context_current();
-       int error;
-#if CONFIG_FSE
-       fse_info finfo;
-#endif
-       int need_event, has_listeners;
-       char *target_path = NULL;
-       int truncated=0;
  
  
-       vp = dvp = lvp = NULLVP;
-
-       /* look up the object we are linking to */
-       NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
+       NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1, 
                 UIO_USERSPACE, uap->path, ctx);
                 UIO_USERSPACE, uap->path, ctx);
-       error = namei(&nd);
+       error = change_dir(&nd, ctx);
         if (error)
                 return (error);
         if (error)
                 return (error);
-       vp = nd.ni_vp;
-
-       nameidone(&nd);
-
+       if ( (error = vnode_ref(nd.ni_vp)) ) {
+               vnode_put(nd.ni_vp);
+               return (error);
+       }
         /*
         /*
-        * Normally, linking to directories is not supported.
-        * However, some file systems may have limited support.
+        * drop the iocount we picked up in change_dir
          */
          */
-       if (vp->v_type == VDIR) {
-               if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
-                       error = EPERM;   /* POSIX */
-                       goto out;
-               }
-               /* Linking to a directory requires ownership. */
-               if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
-                       struct vnode_attr dva;
+       vnode_put(nd.ni_vp);
  
  
-                       VATTR_INIT(&dva);
-                       VATTR_WANTED(&dva, va_uid);
-                       if (vnode_getattr(vp, &dva, ctx) != 0 ||
-                           !VATTR_IS_SUPPORTED(&dva, va_uid) ||
-                           (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
-                               error = EACCES;
-                               goto out;
-                       }
+       if (per_thread) {
+               thread_t th = vfs_context_thread(ctx);
+               if (th) {
+                       uthread_t uth = get_bsdthread_info(th);
+                       tvp = uth->uu_cdir;
+                       uth->uu_cdir = nd.ni_vp;
+                       OSBitOrAtomic(P_THCWD, &p->p_flag);
+               } else {
+                       vnode_rele(nd.ni_vp);
+                       return (ENOENT);
                 }
                 }
+       } else {
+               proc_fdlock(p);
+               tvp = fdp->fd_cdir;
+               fdp->fd_cdir = nd.ni_vp;
+               proc_fdunlock(p);
         }
  
         }
  
-       /* lookup the target node */
-       nd.ni_cnd.cn_nameiop = CREATE;
-       nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
-       nd.ni_dirp = uap->link;
-       error = namei(&nd);
-       if (error != 0)
-               goto out;
-       dvp = nd.ni_dvp;
-       lvp = nd.ni_vp;
-
-#if CONFIG_MACF
-       if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
-               goto out2;
-#endif
+       if (tvp)
+               vnode_rele(tvp);
  
  
-       /* or to anything that kauth doesn't want us to (eg. immutable items) */
-       if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
-               goto out2;
+       return (0);
+}
  
  
-       /* target node must not exist */
-       if (lvp != NULLVP) {
-               error = EEXIST;
-               goto out2;
-       }
-       /* cannot link across mountpoints */
-       if (vnode_mount(vp) != vnode_mount(dvp)) {
-               error = EXDEV;
-               goto out2;
-       }
-               
-       /* authorize creation of the target note */
-       if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
-               goto out2;
  
  
-       /* and finally make the link */
-       error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
-       if (error)
-               goto out2;
+/*
+ * chdir
+ *
+ * Change current working directory (".") for the entire process
+ *
+ * Parameters:  p       Process requesting the call
+ *             uap     User argument descriptor (see below)
+ *             retval  (ignored)
+ *
+ * Indirect parameters:        uap->path       Directory path
+ *
+ * Returns:    0                       Success
+ *             common_chdir: ENOTDIR
+ *             common_chdir: ENOENT    No such file or directory
+ *             common_chdir: ???
+ *
+ */
+int
+chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
+{
+       return common_chdir(p, (void *)uap, 0);
+}
  
  
-#if CONFIG_FSE
-       need_event = need_fsevent(FSE_CREATE_FILE, dvp);
-#else
-       need_event = 0;
-#endif
-       has_listeners = kauth_authorize_fileop_has_listeners();
-
-       if (need_event || has_listeners) {
-               char *link_to_path = NULL;
-               int len, link_name_len;
-
-               /* build the path to the new link file */
-               GET_PATH(target_path);
-               if (target_path == NULL) {
-                       error = ENOMEM;
-                       goto out2;
-               }
-
-               len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
-
-               if (has_listeners) {
-                       /* build the path to file we are linking to */
-                       GET_PATH(link_to_path);
-                       if (link_to_path == NULL) {
-                               error = ENOMEM;
-                               goto out2;
-                       }
-
-                       link_name_len = MAXPATHLEN;
-                       vn_getpath(vp, link_to_path, &link_name_len);
-
-                       /*
-                        * Call out to allow 3rd party notification of rename. 
-                        * Ignore result of kauth_authorize_fileop call.
-                        */
-                       kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK, 
-                                              (uintptr_t)link_to_path, (uintptr_t)target_path);
-                       if (link_to_path != NULL) {
-                               RELEASE_PATH(link_to_path);
-                       }
-               }
-#if CONFIG_FSE
-               if (need_event) {
-                       /* construct fsevent */
-                       if (get_fse_info(vp, &finfo, ctx) == 0) {
-                               if (truncated) {
-                                       finfo.mode |= FSE_TRUNCATED_PATH;
-                               }
-
-                               // build the path to the destination of the link
-                               add_fsevent(FSE_CREATE_FILE, ctx,
-                                           FSE_ARG_STRING, len, target_path,
-                                           FSE_ARG_FINFO, &finfo,
-                                           FSE_ARG_DONE);
-                       }
-                       if (vp->v_parent) {
-                           add_fsevent(FSE_STAT_CHANGED, ctx,
-                               FSE_ARG_VNODE, vp->v_parent,
-                               FSE_ARG_DONE);
-                       }
-               }
-#endif
-       }
-out2:
-       /*
-        * nameidone has to happen before we vnode_put(dvp)
-        * since it may need to release the fs_nodelock on the dvp
-        */
-       nameidone(&nd);
-       if (target_path != NULL) {
-               RELEASE_PATH(target_path);
-       }
-out:
-       if (lvp)
-               vnode_put(lvp);
-       if (dvp)
-               vnode_put(dvp);
-       vnode_put(vp);
-       return (error);
+/*
+ * __pthread_chdir
+ *
+ * Change current working directory (".") for a single thread
+ *
+ * Parameters:  p       Process requesting the call
+ *             uap     User argument descriptor (see below)
+ *             retval  (ignored)
+ *
+ * Indirect parameters:        uap->path       Directory path
+ *
+ * Returns:    0                       Success
+ *             common_chdir: ENOTDIR
+ *             common_chdir: ENOENT    No such file or directory
+ *             common_chdir: ???
+ *
+ */
+int
+__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
+{
+       return common_chdir(p, (void *)uap, 1);
  }
  
  }
  
+
  /*
  /*
- * Make a symbolic link.
- *
- * We could add support for ACLs here too...
+ * Change notion of root (``/'') directory.
   */
  /* ARGSUSED */
  int
   */
  /* ARGSUSED */
  int
-symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval)
+chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
  {
  {
-       struct vnode_attr va;
-       char *path;
+       struct filedesc *fdp = p->p_fd;
         int error;
         struct nameidata nd;
         int error;
         struct nameidata nd;
+       vnode_t tvp;
         vfs_context_t ctx = vfs_context_current();
         vfs_context_t ctx = vfs_context_current();
-       vnode_t vp, dvp;
-       size_t dummy=0;
-       
-       MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
-       error = copyinstr(uap->path, path, MAXPATHLEN, &dummy);
-       if (error)
-               goto out;
-       AUDIT_ARG(text, path);  /* This is the link string */
  
  
-       NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, 
-               UIO_USERSPACE, uap->link, ctx);
-       error = namei(&nd);
+       if ((error = suser(kauth_cred_get(), &p->p_acflag)))
+               return (error);
+
+       NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1, 
+               UIO_USERSPACE, uap->path, ctx);
+       error = change_dir(&nd, ctx);
         if (error)
         if (error)
-               goto out;
-       dvp = nd.ni_dvp;
-       vp = nd.ni_vp;
+               return (error);
  
  
-       VATTR_INIT(&va);
-       VATTR_SET(&va, va_type, VLNK);
-       VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
  #if CONFIG_MACF
  #if CONFIG_MACF
-       error = mac_vnode_check_create(ctx,
-                       dvp, &nd.ni_cnd, &va);
-#endif
-       if (error != 0) {
-           goto skipit;
+       error = mac_vnode_check_chroot(ctx, nd.ni_vp,
+           &nd.ni_cnd);
+       if (error) {
+               vnode_put(nd.ni_vp);
+               return (error);
         }
         }
+#endif
  
  
-       if (vp != NULL) {
-           error = EEXIST;
-           goto skipit;
+       if ( (error = vnode_ref(nd.ni_vp)) ) {
+               vnode_put(nd.ni_vp);
+               return (error);
         }
         }
+       vnode_put(nd.ni_vp);
  
  
-       /* authorize */
-       if (error == 0)
-               error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
-       /* get default ownership, etc. */
-       if (error == 0)
-               error = vnode_authattr_new(dvp, &va, 0, ctx);
-       if (error == 0)
-               error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
+       proc_fdlock(p);
+       tvp = fdp->fd_rdir;
+       fdp->fd_rdir = nd.ni_vp;
+       fdp->fd_flags |= FD_CHROOT;
+       proc_fdunlock(p);
  
  
-       /* do fallback attribute handling */
-       if (error == 0)
-               error = vnode_setattr_fallback(vp, &va, ctx);
-               
-       if (error == 0) {
-               int     update_flags = 0;
+       if (tvp != NULL)
+               vnode_rele(tvp);
  
  
-               if (vp == NULL) {
-                       nd.ni_cnd.cn_nameiop = LOOKUP;
-                       nd.ni_cnd.cn_flags = 0;
-                       error = namei(&nd);
-                       vp = nd.ni_vp;
+       return (0);
+}
  
  
-                       if (vp == NULL)
-                               goto skipit;
-               }
-                       
-#if 0  /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
-               /* call out to allow 3rd party notification of rename. 
-                * Ignore result of kauth_authorize_fileop call.
-                */
-               if (kauth_authorize_fileop_has_listeners() &&
-                   namei(&nd) == 0) {
-                       char *new_link_path = NULL;
-                       int             len;
-                               
-                       /* build the path to the new link file */
-                       new_link_path = get_pathbuff();
-                       len = MAXPATHLEN;
-                       vn_getpath(dvp, new_link_path, &len);
-                       if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
-                               new_link_path[len - 1] = '/';
-                               strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
-                       }
-                               
-                       kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK, 
-                                          (uintptr_t)path, (uintptr_t)new_link_path);
-                       if (new_link_path != NULL)
-                               release_pathbuff(new_link_path);
-               }
-#endif 
-               // Make sure the name & parent pointers are hooked up
-               if (vp->v_name == NULL)
-                       update_flags |= VNODE_UPDATE_NAME;
-               if (vp->v_parent == NULLVP)
-                       update_flags |= VNODE_UPDATE_PARENT;
-               
-               if (update_flags)
-                       vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
+/*
+ * Common routine for chroot and chdir.
+ *
+ * Returns:    0                       Success
+ *             ENOTDIR                 Not a directory
+ *             namei:???               [anything namei can return]
+ *             vnode_authorize:???     [anything vnode_authorize can return]
+ */
+static int
+change_dir(struct nameidata *ndp, vfs_context_t ctx)
+{
+       vnode_t vp;
+       int error;
  
  
-#if CONFIG_FSE
-               add_fsevent(FSE_CREATE_FILE, ctx,
-                           FSE_ARG_VNODE, vp,
-                           FSE_ARG_DONE);
-#endif
+       if ((error = namei(ndp)))
+               return (error);
+       nameidone(ndp);
+       vp = ndp->ni_vp;
+
+       if (vp->v_type != VDIR) {
+               vnode_put(vp);
+               return (ENOTDIR);
         }
  
         }
  
-skipit:
-       /*
-        * nameidone has to happen before we vnode_put(dvp)
-        * since it may need to release the fs_nodelock on the dvp
-        */
-       nameidone(&nd);
+#if CONFIG_MACF
+       error = mac_vnode_check_chdir(ctx, vp);
+       if (error) {
+               vnode_put(vp);
+               return (error);
+       }
+#endif
  
  
-       if (vp)
-               vnode_put(vp);
-       vnode_put(dvp);
-out:
-       FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
+       error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
+       if (error) {
+               vnode_put(vp);
+               return (error);
+       }
  
         return (error);
  }
  
  /*
  
         return (error);
  }
  
  /*
- * Delete a whiteout from the filesystem.
- * XXX authorization not implmented for whiteouts
+ * Free the vnode data (for directories) associated with the file glob.
   */
   */
-int
-undelete(__unused proc_t p, struct undelete_args *uap, __unused int32_t *retval)
+struct fd_vn_data *
+fg_vn_data_alloc(void)
  {
  {
-       int error;
-       struct nameidata nd;
-       vfs_context_t ctx = vfs_context_current();
-       vnode_t vp, dvp;
-
-       NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT|AUDITVNPATH1, 
-               UIO_USERSPACE, uap->path, ctx);
-       error = namei(&nd);
-       if (error)
-               return (error);
-       dvp = nd.ni_dvp;
-       vp = nd.ni_vp;
-
-       if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) {
-               error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, ctx);
-       } else
-               error = EEXIST;
+       struct fd_vn_data *fvdata;
  
  
-       /*
-        * nameidone has to happen before we vnode_put(dvp)
-        * since it may need to release the fs_nodelock on the dvp
-        */
-       nameidone(&nd);
+       /* Allocate per fd vnode data */
+       MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
+              M_FD_VN_DATA, M_WAITOK | M_ZERO);
+       lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
+       return fvdata;
+}
  
  
-       if (vp)
-               vnode_put(vp);
-       vnode_put(dvp);
+/*
+ * Free the vnode data (for directories) associated with the file glob.
+ */
+void
+fg_vn_data_free(void *fgvndata)
+{
+       struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
  
  
-       return (error);
+       if (fvdata->fv_buf)
+               FREE(fvdata->fv_buf, M_FD_DIRBUF);
+       lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
+       FREE(fvdata, M_FD_VN_DATA);
  }
  
  }
  
-
  /*
  /*
- * Delete a name from the filesystem.
+ * Check permissions, allocate an open file structure,
+ * and call the device open routine if any.
+ *
+ * Returns:    0                       Success
+ *             EINVAL
+ *             EINTR
+ *     falloc:ENFILE
+ *     falloc:EMFILE
+ *     falloc:ENOMEM
+ *     vn_open_auth:???
+ *     dupfdopen:???
+ *     VNOP_ADVLOCK:???
+ *     vnode_setsize:???
+ *
+ * XXX Need to implement uid, gid
   */
   */
-/* ARGSUSED */
  int
  int
-unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy)
+open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
+    struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
+    int32_t *retval)
  {
  {
-       vnode_t vp, dvp;
-       int error;
-       struct componentname *cnp;
-       char  *path = NULL;
-       int  len=0;
-#if CONFIG_FSE
-       fse_info  finfo;
-#endif
-       int flags = 0;
-       int need_event = 0;
-       int has_listeners = 0;
-       int truncated_path=0;
-#if NAMEDRSRCFORK
-       /* unlink or delete is allowed on rsrc forks and named streams */
-       ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
-#endif
+       proc_t p = vfs_context_proc(ctx);
+       uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
+       struct fileproc *fp;
+       vnode_t vp;
+       int flags, oflags;
+       int type, indx, error;
+       struct flock lf;
+       int no_controlling_tty = 0;
+       int deny_controlling_tty = 0;
+       struct session *sessp = SESSION_NULL;
  
  
-       ndp->ni_cnd.cn_flags |= LOCKPARENT;
-       cnp = &ndp->ni_cnd;
+       oflags = uflags;
  
  
-       error = namei(ndp);
-       if (error)
-               return (error);
+       if ((oflags & O_ACCMODE) == O_ACCMODE)
+               return(EINVAL);
+       flags = FFLAGS(uflags);
  
  
-       dvp = ndp->ni_dvp;
-       vp = ndp->ni_vp;
+       AUDIT_ARG(fflags, oflags);
+       AUDIT_ARG(mode, vap->va_mode);
  
  
-       /* With Carbon delete semantics, busy files cannot be deleted */
-       if (nodelbusy) {
-               flags |= VNODE_REMOVE_NODELETEBUSY;
+       if ((error = falloc_withalloc(p,
+           &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
+               return (error);
         }
         }
+       uu->uu_dupfd = -indx - 1;
  
  
-       /*
-        * Normally, unlinking of directories is not supported. 
-        * However, some file systems may have limited support.
-        */
-       if ((vp->v_type == VDIR) &&
-           !(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
-               error = EPERM;  /* POSIX */
+       if (!(p->p_flag & P_CONTROLT)) {
+               sessp = proc_session(p);
+               no_controlling_tty = 1;
+               /*
+                * If conditions would warrant getting a controlling tty if
+                * the device being opened is a tty (see ttyopen in tty.c),
+                * but the open flags deny it, set a flag in the session to
+                * prevent it.
+                */
+               if (SESS_LEADER(p, sessp) &&
+                   sessp->s_ttyvp == NULL &&
+                   (flags & O_NOCTTY)) {
+                       session_lock(sessp);
+                       sessp->s_flags |= S_NOCTTY;
+                       session_unlock(sessp);
+                       deny_controlling_tty = 1;
+               }
         }
  
         }
  
-       /*
-        * The root of a mounted filesystem cannot be deleted.
-        */
-       if (vp->v_flag & VROOT) {
-               error = EBUSY;
+       if ((error = vn_open_auth(ndp, &flags, vap))) {
+               if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){        /* XXX from fdopen */
+                       if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
+                               fp_drop(p, indx, NULL, 0);
+                               *retval = indx;
+                               if (deny_controlling_tty) {
+                                       session_lock(sessp);
+                                       sessp->s_flags &= ~S_NOCTTY;
+                                       session_unlock(sessp);
+                               }
+                               if (sessp != SESSION_NULL)
+                                       session_rele(sessp);
+                               return (0);
+                       }
+               }
+               if (error == ERESTART)
+                       error = EINTR;
+               fp_free(p, indx, fp);
+
+               if (deny_controlling_tty) {
+                       session_lock(sessp);
+                       sessp->s_flags &= ~S_NOCTTY;
+                       session_unlock(sessp);
+               }
+               if (sessp != SESSION_NULL)
+                       session_rele(sessp);
+               return (error);
         }
         }
-       if (error)
-               goto out;
+       uu->uu_dupfd = 0;
+       vp = ndp->ni_vp;
  
  
+       fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
+       fp->f_fglob->fg_ops = &vnops;
+       fp->f_fglob->fg_data = (caddr_t)vp;
  
  
-       /* authorize the delete operation */
-#if CONFIG_MACF
-       if (!error)
-               error = mac_vnode_check_unlink(ctx,
-                   dvp, vp, cnp);
-#endif /* MAC */
-       if (!error)
-               error = vnode_authorize(vp, ndp->ni_dvp, KAUTH_VNODE_DELETE, ctx);
-       if (error)
-               goto out;
-       
-#if CONFIG_FSE
-       need_event = need_fsevent(FSE_DELETE, dvp);
-       if (need_event) {
-               if ((vp->v_flag & VISHARDLINK) == 0) {
-                       get_fse_info(vp, &finfo, ctx);
+#if CONFIG_PROTECT
+       if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) {
+               if (vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) {
+                       fp->f_fglob->fg_flag |= FENCRYPTED;
                 }
         }
  #endif
                 }
         }
  #endif
-       has_listeners = kauth_authorize_fileop_has_listeners();
-       if (need_event || has_listeners) {
-               GET_PATH(path);
-               if (path == NULL) {
-                       error = ENOMEM;
-                       goto out;
-               }
  
  
-               len = safe_getpath(dvp, ndp->ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
+       if (flags & (O_EXLOCK | O_SHLOCK)) {
+               lf.l_whence = SEEK_SET;
+               lf.l_start = 0;
+               lf.l_len = 0;
+               if (flags & O_EXLOCK)
+                       lf.l_type = F_WRLCK;
+               else
+                       lf.l_type = F_RDLCK;
+               type = F_FLOCK;
+               if ((flags & FNONBLOCK) == 0)
+                       type |= F_WAIT;
+#if CONFIG_MACF
+               error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
+                   F_SETLK, &lf);
+               if (error)
+                       goto bad;
+#endif
+               if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
+                       goto bad;
+               fp->f_fglob->fg_flag |= FHASLOCK;
         }
  
         }
  
-#if NAMEDRSRCFORK
-       if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK)
-               error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
-       else
-#endif
-               error = VNOP_REMOVE(dvp, vp, &ndp->ni_cnd, flags, ctx);
+       /* try to truncate by setting the size attribute */
+       if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
+               goto bad;
  
         /*
  
         /*
-        * Call out to allow 3rd party notification of delete. 
-        * Ignore result of kauth_authorize_fileop call.
+        * If the open flags denied the acquisition of a controlling tty,
+        * clear the flag in the session structure that prevented the lower
+        * level code from assigning one.
          */
          */
-       if (!error) {
-               if (has_listeners) {
-                       kauth_authorize_fileop(vfs_context_ucred(ctx), 
-                               KAUTH_FILEOP_DELETE, 
-                               (uintptr_t)vp,
-                               (uintptr_t)path);
-               }
+       if (deny_controlling_tty) {
+               session_lock(sessp);
+               sessp->s_flags &= ~S_NOCTTY;
+               session_unlock(sessp);
+       }
  
  
-               if (vp->v_flag & VISHARDLINK) {
-                   //
-                   // if a hardlink gets deleted we want to blow away the
-                   // v_parent link because the path that got us to this
-                   // instance of the link is no longer valid.  this will
-                   // force the next call to get the path to ask the file
-                   // system instead of just following the v_parent link.
-                   //
-                   vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
-               }
+       /*
+        * If a controlling tty was set by the tty line discipline, then we
+        * want to set the vp of the tty into the session structure.  We have
+        * a race here because we can't get to the vp for the tp in ttyopen,
+        * because it's not passed as a parameter in the open path.
+        */
+       if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
+               vnode_t ttyvp;
  
  
-#if CONFIG_FSE
-               if (need_event) {
-                       if (vp->v_flag & VISHARDLINK) {
-                               get_fse_info(vp, &finfo, ctx);
-                       }
-                       if (truncated_path) {
-                               finfo.mode |= FSE_TRUNCATED_PATH;
-                       }
-                       add_fsevent(FSE_DELETE, ctx,
-                                               FSE_ARG_STRING, len, path,
-                                               FSE_ARG_FINFO, &finfo,
-                                               FSE_ARG_DONE);
-               }
-#endif
+               session_lock(sessp);
+               ttyvp = sessp->s_ttyvp;
+               sessp->s_ttyvp = vp;
+               sessp->s_ttyvid = vnode_vid(vp);
+               session_unlock(sessp);
         }
         }
-       if (path != NULL)
-               RELEASE_PATH(path);
  
         /*
  
         /*
-        * nameidone has to happen before we vnode_put(dvp)
-        * since it may need to release the fs_nodelock on the dvp
-        */
-out:
-#if NAMEDRSRCFORK
-       /* recycle the deleted rsrc fork vnode to force a reclaim, which 
-        * will cause its shadow file to go away if necessary.
+        * For directories we hold some additional information in the fd.
          */
          */
-        if ((vnode_isnamedstream(ndp->ni_vp)) &&
-               (ndp->ni_vp->v_parent != NULLVP) &&
-               vnode_isshadow(ndp->ni_vp)) {
-                       vnode_recycle(ndp->ni_vp);
-        }      
-#endif
-       nameidone(ndp);
-       vnode_put(dvp);
+       if (vnode_vtype(vp) == VDIR) {
+               fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
+       } else {
+               fp->f_fglob->fg_vn_data = NULL;
+       }
+
         vnode_put(vp);
         vnode_put(vp);
-       return (error);
-}
  
  
-/*
- * Delete a name from the filesystem using POSIX semantics.
- */
-int
-unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
-{
-       struct nameidata nd;
-       vfs_context_t ctx = vfs_context_current();
-
-       NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
-       return unlink1(ctx, &nd, 0);
-}
-
-/*
- * Delete a name from the filesystem using Carbon semantics.
- */
-int
-delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
-{
-       struct nameidata nd;
-       vfs_context_t ctx = vfs_context_current();
-
-       NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
-       return unlink1(ctx, &nd, 1);
-}
+       proc_fdlock(p);
+       if (flags & O_CLOEXEC)
+               *fdflags(p, indx) |= UF_EXCLOSE;
+       if (flags & O_CLOFORK)
+               *fdflags(p, indx) |= UF_FORKCLOSE;
+       procfdtbl_releasefd(p, indx, NULL);
+       fp_drop(p, indx, fp, 1);
+       proc_fdunlock(p);
  
  
-/*
- * Reposition read/write file offset.
- */
-int
-lseek(proc_t p, struct lseek_args *uap, off_t *retval)
-{
-       struct fileproc *fp;
-       vnode_t vp;
-       struct vfs_context *ctx;
-       off_t offset = uap->offset, file_size;
-       int error;
+       *retval = indx;
  
  
-       if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
-               if (error == ENOTSUP)
-                       return (ESPIPE);
-               return (error);
-       }
-       if (vnode_isfifo(vp)) {
-               file_drop(uap->fd);
-               return(ESPIPE);
+       if (sessp != SESSION_NULL)
+               session_rele(sessp);
+       return (0);
+bad:
+       if (deny_controlling_tty) {
+               session_lock(sessp);
+               sessp->s_flags &= ~S_NOCTTY;
+               session_unlock(sessp);
         }
         }
+       if (sessp != SESSION_NULL)
+               session_rele(sessp);
  
  
-
-       ctx = vfs_context_current();
-#if CONFIG_MACF
-       if (uap->whence == L_INCR && uap->offset == 0)
-               error = mac_file_check_get_offset(vfs_context_ucred(ctx),
-                   fp->f_fglob);
-       else
-               error = mac_file_check_change_offset(vfs_context_ucred(ctx),
-                   fp->f_fglob);
-       if (error) {
-               file_drop(uap->fd);
-               return (error);
-       }
-#endif
-       if ( (error = vnode_getwithref(vp)) ) {
-               file_drop(uap->fd);
-               return(error);
+       struct vfs_context context = *vfs_context_current();
+       context.vc_ucred = fp->f_fglob->fg_cred;
+    
+       if ((fp->f_fglob->fg_flag & FHASLOCK) &&
+           (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
+               lf.l_whence = SEEK_SET;
+               lf.l_start = 0;
+               lf.l_len = 0;
+               lf.l_type = F_UNLCK;
+        
+               (void)VNOP_ADVLOCK(
+                       vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
         }
  
         }
  
-       switch (uap->whence) {
-       case L_INCR:
-               offset += fp->f_fglob->fg_offset;
-               break;
-       case L_XTND:
-               if ((error = vnode_size(vp, &file_size, ctx)) != 0)
-                       break;
-               offset += file_size;
-               break;
-       case L_SET:
-               break;
-       default:
-               error = EINVAL;
-       }
-       if (error == 0) {
-               if (uap->offset > 0 && offset < 0) {
-                       /* Incremented/relative move past max size */
-                       error = EOVERFLOW;
-               } else {
-                       /*
-                        * Allow negative offsets on character devices, per
-                        * POSIX 1003.1-2001.  Most likely for writing disk
-                        * labels.
-                        */
-                       if (offset < 0 && vp->v_type != VCHR) {
-                               /* Decremented/relative move before start */
-                               error = EINVAL;
-                       } else {
-                               /* Success */
-                               fp->f_fglob->fg_offset = offset;
-                               *retval = fp->f_fglob->fg_offset;
-                       }
-               }
-       }
+       vn_close(vp, fp->f_fglob->fg_flag, &context);
+       vnode_put(vp);
+       fp_free(p, indx, fp);
  
  
-       /* 
-        * An lseek can affect whether data is "available to read."  Use
-        * hint of NOTE_NONE so no EVFILT_VNODE events fire
-        */
-       post_event_if_success(vp, error, NOTE_NONE);
-       (void)vnode_put(vp);
-       file_drop(uap->fd);
         return (error);
  }
  
         return (error);
  }
  
-
  /*
  /*
- * Check access permissions.
- *
- * Returns:    0                       Success
- *             vnode_authorize:???
+ * While most of the *at syscall handlers can call nameiat() which
+ * is a wrapper around namei, the use of namei and initialisation
+ * of nameidata are far removed and in different functions  - namei
+ * gets called in vn_open_auth for open1. So we'll just do here what
+ * nameiat() does.
   */
  static int
   */
  static int
-access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
+open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
+    struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
+    int dirfd)
  {
  {
-       kauth_action_t action;
-       int error;
+       if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
+               int error;
+               char c;
  
  
-       /*
-        * If just the regular access bits, convert them to something
-        * that vnode_authorize will understand.
-        */
-       if (!(uflags & _ACCESS_EXTENDED_MASK)) {
-               action = 0;
-               if (uflags & R_OK)
-                       action |= KAUTH_VNODE_READ_DATA;        /* aka KAUTH_VNODE_LIST_DIRECTORY */
-               if (uflags & W_OK) {
-                       if (vnode_isdir(vp)) {
-                               action |= KAUTH_VNODE_ADD_FILE |
-                                   KAUTH_VNODE_ADD_SUBDIRECTORY;
-                               /* might want delete rights here too */
-                       } else {
-                               action |= KAUTH_VNODE_WRITE_DATA;
-                       }
+               if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
+                       error = copyin(ndp->ni_dirp, &c, sizeof(char));
+                       if (error)
+                               return (error);
+               } else {
+                       c = *((char *)(ndp->ni_dirp));
                 }
                 }
-               if (uflags & X_OK) {
-                       if (vnode_isdir(vp)) {
-                               action |= KAUTH_VNODE_SEARCH;
-                       } else {
-                               action |= KAUTH_VNODE_EXECUTE;
+
+               if (c != '/') {
+                       vnode_t dvp_at;
+
+                       error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
+                           &dvp_at);
+                       if (error)
+                               return (error);
+
+                       if (vnode_vtype(dvp_at) != VDIR) {
+                               vnode_put(dvp_at);
+                               return (ENOTDIR);
                         }
                         }
-               }
-       } else {
-               /* take advantage of definition of uflags */
-               action = uflags >> 8;
-       }
-       
-#if CONFIG_MACF
-       error = mac_vnode_check_access(ctx, vp, uflags);
-       if (error)
-               return (error);
-#endif /* MAC */
  
  
-       /* action == 0 means only check for existence */
-       if (action != 0) {
-               error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
-       } else {
-               error = 0;
+                       ndp->ni_dvp = dvp_at;
+                       ndp->ni_cnd.cn_flags |= USEDVP;
+                       error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
+                           retval);
+                       vnode_put(dvp_at);
+                       return (error);
+               }
         }
  
         }
  
-       return(error);
+       return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
  }
  
  }
  
-
-
  /*
  /*
- * access_extended: Check access permissions in bulk.
- *
- * Description:        uap->entries            Pointer to an array of accessx
- *                                     descriptor structs, plus one or 
- *                                     more NULL terminated strings (see 
- *                                     "Notes" section below).
- *             uap->size               Size of the area pointed to by
- *                                     uap->entries.
- *             uap->results            Pointer to the results array.
- *
- * Returns:    0                       Success
- *             ENOMEM                  Insufficient memory
- *             EINVAL                  Invalid arguments
- *             namei:EFAULT            Bad address
- *             namei:ENAMETOOLONG      Filename too long
- *             namei:ENOENT            No such file or directory
- *             namei:ELOOP             Too many levels of symbolic links
- *             namei:EBADF             Bad file descriptor
- *             namei:ENOTDIR           Not a directory
- *             namei:???
- *             access1:
- *
- * Implicit returns:
- *             uap->results            Array contents modified
+ * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
   *
   *
- * Notes:      The uap->entries are structured as an arbitrary length array
- *             of accessx descriptors, followed by one or more NULL terminated
- *             strings
+ * Parameters: p                       Process requesting the open
+ *             uap                     User argument descriptor (see below)
+ *             retval                  Pointer to an area to receive the
+ *                                     return calue from the system call
   *
   *
- *                     struct accessx_descriptor[0]
- *                     ...
- *                     struct accessx_descriptor[n]
- *                     char name_data[0];
+ * Indirect:   uap->path               Path to open (same as 'open')
+ *             uap->flags              Flags to open (same as 'open'
+ *             uap->uid                UID to set, if creating
+ *             uap->gid                GID to set, if creating
+ *             uap->mode               File mode, if creating (same as 'open')
+ *             uap->xsecurity          ACL to set, if creating
   *
   *
- *             We determine the entry count by walking the buffer containing
- *             the uap->entries argument descriptor.  For each descriptor we
- *             see, the valid values for the offset ad_name_offset will be
- *             in the byte range:
+ * Returns:    0                       Success
+ *             !0                      errno value
   *
   *
- *                     [ uap->entries + sizeof(struct accessx_descriptor) ]
- *                                             to
- *                             [ uap->entries + uap->size - 2 ]
+ * Notes:      The kauth_filesec_t in 'va', if any, is in host byte order.
   *
   *
- *             since we must have at least one string, and the string must
- *             be at least one character plus the NULL terminator in length.
- *             
- * XXX:                Need to support the check-as uid argument
+ * XXX:                We should enummerate the possible errno values here, and where
+ *             in the code they originated.
   */
  int
   */
  int
-access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
+open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
  {
  {
-       struct accessx_descriptor *input = NULL;
-       errno_t *result = NULL;
-       errno_t error = 0;
-       int wantdelete = 0;
-       unsigned int desc_max, desc_actual, i, j;
-       struct vfs_context context;
+       struct filedesc *fdp = p->p_fd;
+       int ciferror;
+       kauth_filesec_t xsecdst;
+       struct vnode_attr va;
         struct nameidata nd;
         struct nameidata nd;
-       int niopts;
-       vnode_t vp = NULL;
-       vnode_t dvp = NULL;
-#define ACCESSX_MAX_DESCR_ON_STACK 10
-       struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
+       int cmode;
  
  
-       context.vc_ucred = NULL;
+       AUDIT_ARG(owner, uap->uid, uap->gid);
+
+       xsecdst = NULL;
+       if ((uap->xsecurity != USER_ADDR_NULL) &&
+           ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
+               return ciferror;
+
+       VATTR_INIT(&va);
+       cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
+       VATTR_SET(&va, va_mode, cmode);
+       if (uap->uid != KAUTH_UID_NONE)
+               VATTR_SET(&va, va_uid, uap->uid);
+       if (uap->gid != KAUTH_GID_NONE)
+               VATTR_SET(&va, va_gid, uap->gid);
+       if (xsecdst != NULL)
+               VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
+
+       NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+              uap->path, vfs_context_current());
+
+       ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
+                        fileproc_alloc_init, NULL, retval);
+       if (xsecdst != NULL)
+               kauth_filesec_free(xsecdst);
+
+       return ciferror;
+}
+
+/* 
+ * Go through the data-protected atomically controlled open (2)
+ *  
+ * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
+ */
+int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
+       int flags = uap->flags;
+       int class = uap->class;
+       int dpflags = uap->dpflags;
+
+       /* 
+        * Follow the same path as normal open(2)
+        * Look up the item if it exists, and acquire the vnode.
+        */
+       struct filedesc *fdp = p->p_fd;
+       struct vnode_attr va;
+       struct nameidata nd;
+       int cmode;
+       int error;
+       
+       VATTR_INIT(&va);
+       /* Mask off all but regular access permissions */
+       cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
+       VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
+
+       NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+              uap->path, vfs_context_current());
+
+       /* 
+        * Initialize the extra fields in vnode_attr to pass down our 
+        * extra fields.
+        * 1. target cprotect class.
+        * 2. set a flag to mark it as requiring open-raw-encrypted semantics. 
+        */ 
+       if (flags & O_CREAT) {  
+               VATTR_SET(&va, va_dataprotect_class, class);
+       }
+       
+       if (dpflags & O_DP_GETRAWENCRYPTED) {
+               if ( flags & (O_RDWR | O_WRONLY)) {
+                       /* Not allowed to write raw encrypted bytes */
+                       return EINVAL;          
+               }                       
+               VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
+       }
+
+       error = open1(vfs_context_current(), &nd, uap->flags, &va,
+                     fileproc_alloc_init, NULL, retval);
+
+       return error;
+}
+
+static int
+openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
+    int fd, enum uio_seg segflg, int *retval)
+{
+       struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
+       struct vnode_attr va;
+       struct nameidata nd;
+       int cmode;
+
+       VATTR_INIT(&va);
+       /* Mask off all but regular access permissions */
+       cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
+       VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
+
+       NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
+           segflg, path, ctx);
+
+       return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
+           retval, fd));
+}
+
+int
+open(proc_t p, struct open_args *uap, int32_t *retval)
+{
+       __pthread_testcancel(1);
+       return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
+}
+
+int
+open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
+    int32_t *retval)
+{
+       return (openat_internal(vfs_context_current(), uap->path, uap->flags,
+           uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
+}
+
+int
+openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
+                int32_t *retval)
+{
+       return (openat_internal(vfs_context_current(), uap->path, uap->flags,
+           uap->mode, uap->fd, UIO_USERSPACE, retval));
+}
+
+int
+openat(proc_t p, struct openat_args *uap, int32_t *retval)
+{
+       __pthread_testcancel(1);
+       return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
+}
+
+/*
+ * openbyid_np: open a file given a file system id and a file system object id
+ *     the hfs file system object id is an fsobj_id_t {uint32, uint32}
+ *     file systems that don't support object ids it is a node id (uint64_t).
+ *
+ * Parameters: p                       Process requesting the open
+ *             uap                     User argument descriptor (see below)
+ *             retval                  Pointer to an area to receive the
+ *                                     return calue from the system call
+ *
+ * Indirect:   uap->path               Path to open (same as 'open')
+ *
+ *             uap->fsid               id of target file system
+ *             uap->objid              id of target file system object
+ *             uap->flags              Flags to open (same as 'open')
+ *
+ * Returns:    0                       Success
+ *             !0                      errno value
+ *
+ *
+ * XXX:                We should enummerate the possible errno values here, and where
+ *             in the code they originated.
+ */
+int
+openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
+{
+       fsid_t fsid;
+       uint64_t objid;
+       int error;
+       char *buf = NULL;
+       int buflen = MAXPATHLEN;
+       int pathlen = 0;
+       vfs_context_t ctx = vfs_context_current();
+
+       if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
+               return (error);
+       }
+
+       /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
+       if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
+               return (error);
+       }
+
+       AUDIT_ARG(value32, fsid.val[0]);
+       AUDIT_ARG(value64, objid);
+
+       /*resolve path from fsis, objid*/
+       do {
+               MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
+               if (buf == NULL) {
+                       return (ENOMEM);
+               }
+
+               error = fsgetpath_internal(
+                       ctx, fsid.val[0], objid,
+                       buflen, buf, &pathlen);
+
+               if (error) {
+                       FREE(buf, M_TEMP);
+                       buf = NULL;
+               }
+       } while (error == ENOSPC && (buflen += MAXPATHLEN));
+
+       if (error) {
+               return error;
+       }
+
+       buf[pathlen] = 0;
+
+       error = openat_internal(
+               ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
+
+       FREE(buf, M_TEMP);
+
+       return error;
+}
+
+
+/*
+ * Create a special file.
+ */
+static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
+
+int
+mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
+{
+       struct vnode_attr va;
+       vfs_context_t ctx = vfs_context_current();
+       int error;
+       struct nameidata nd;
+       vnode_t vp, dvp;
+
+       VATTR_INIT(&va);
+       VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
+       VATTR_SET(&va, va_rdev, uap->dev);
+
+       /* If it's a mknod() of a FIFO, call mkfifo1() instead */
+       if ((uap->mode & S_IFMT) == S_IFIFO)
+               return(mkfifo1(ctx, uap->path, &va));
+
+       AUDIT_ARG(mode, uap->mode);
+       AUDIT_ARG(value32, uap->dev);
+
+       if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
+               return (error);
+       NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1, 
+               UIO_USERSPACE, uap->path, ctx);
+       error = namei(&nd);
+       if (error)
+               return (error);
+       dvp = nd.ni_dvp;
+       vp = nd.ni_vp;
+
+       if (vp != NULL) {
+               error = EEXIST;
+               goto out;
+       }
+
+       switch (uap->mode & S_IFMT) {
+       case S_IFMT:    /* used by badsect to flag bad sectors */
+               VATTR_SET(&va, va_type, VBAD);
+               break;
+       case S_IFCHR:
+               VATTR_SET(&va, va_type, VCHR);
+               break;
+       case S_IFBLK:
+               VATTR_SET(&va, va_type, VBLK);
+               break;
+       default:
+               error = EINVAL;
+               goto out;
+       }
+
+#if CONFIG_MACF
+       error = mac_vnode_check_create(ctx,
+           nd.ni_dvp, &nd.ni_cnd, &va);
+       if (error)
+               goto out;
+#endif
+
+       if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
+               goto out;
+
+       if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
+               goto out;
+
+       if (vp) {
+               int     update_flags = 0;
+
+               // Make sure the name & parent pointers are hooked up
+               if (vp->v_name == NULL)
+                       update_flags |= VNODE_UPDATE_NAME;
+               if (vp->v_parent == NULLVP)
+                       update_flags |= VNODE_UPDATE_PARENT;
+
+               if (update_flags)
+                       vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
+
+#if CONFIG_FSE
+               add_fsevent(FSE_CREATE_FILE, ctx,
+                   FSE_ARG_VNODE, vp,
+                   FSE_ARG_DONE);
+#endif
+       }
+
+out:
+       /*
+        * nameidone has to happen before we vnode_put(dvp)
+        * since it may need to release the fs_nodelock on the dvp
+        */
+       nameidone(&nd);
+
+       if (vp)
+               vnode_put(vp);
+       vnode_put(dvp);
+
+       return (error);
+}
+
+/*
+ * Create a named pipe.
+ *
+ * Returns:    0                       Success
+ *             EEXIST
+ *     namei:???
+ *     vnode_authorize:???
+ *     vn_create:???
+ */
+static int
+mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
+{
+       vnode_t vp, dvp;
+       int error;
+       struct nameidata nd;
+
+       NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1, 
+               UIO_USERSPACE, upath, ctx);
+       error = namei(&nd);
+       if (error)
+               return (error);
+       dvp = nd.ni_dvp;
+       vp = nd.ni_vp;
+
+       /* check that this is a new file and authorize addition */
+       if (vp != NULL) {
+               error = EEXIST;
+               goto out;
+       }
+       VATTR_SET(vap, va_type, VFIFO);
+
+       if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
+               goto out;
+
+       error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
+out:
+       /*
+        * nameidone has to happen before we vnode_put(dvp)
+        * since it may need to release the fs_nodelock on the dvp
+        */
+       nameidone(&nd);
+
+       if (vp)
+               vnode_put(vp);
+       vnode_put(dvp);
+
+       return error;
+}
+
+
+/*
+ * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
+ *
+ * Parameters: p                       Process requesting the open
+ *             uap                     User argument descriptor (see below)
+ *             retval                  (Ignored)
+ *
+ * Indirect:   uap->path               Path to fifo (same as 'mkfifo')
+ *             uap->uid                UID to set
+ *             uap->gid                GID to set
+ *             uap->mode               File mode to set (same as 'mkfifo')
+ *             uap->xsecurity          ACL to set, if creating
+ *
+ * Returns:    0                       Success
+ *             !0                      errno value
+ *
+ * Notes:      The kauth_filesec_t in 'va', if any, is in host byte order.
+ *
+ * XXX:                We should enummerate the possible errno values here, and where
+ *             in the code they originated.
+ */
+int
+mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
+{
+       int ciferror;
+       kauth_filesec_t xsecdst;
+       struct vnode_attr va;
+
+       AUDIT_ARG(owner, uap->uid, uap->gid);
+
+       xsecdst = KAUTH_FILESEC_NONE;
+       if (uap->xsecurity != USER_ADDR_NULL) {
+               if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
+                       return ciferror;
+       }
+
+       VATTR_INIT(&va);
+       VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
+       if (uap->uid != KAUTH_UID_NONE)
+               VATTR_SET(&va, va_uid, uap->uid);
+       if (uap->gid != KAUTH_GID_NONE)
+               VATTR_SET(&va, va_gid, uap->gid);
+       if (xsecdst != KAUTH_FILESEC_NONE)
+               VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
+
+       ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
+
+       if (xsecdst != KAUTH_FILESEC_NONE)
+               kauth_filesec_free(xsecdst);
+       return ciferror;
+}
+
+/* ARGSUSED */
+int
+mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
+{
+       struct vnode_attr va;
+
+       VATTR_INIT(&va);
+       VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
+
+       return(mkfifo1(vfs_context_current(), uap->path, &va));
+}
+
+
+static char *
+my_strrchr(char *p, int ch)
+{
+       char *save;
+
+       for (save = NULL;; ++p) {
+               if (*p == ch)
+                       save = p;
+               if (!*p)
+                       return(save);
+       }
+       /* NOTREACHED */
+}
+
+extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
+
+int
+safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
+{
+       int ret, len = _len;
+
+       *truncated_path = 0;
+       ret = vn_getpath(dvp, path, &len);
+       if (ret == 0 && len < (MAXPATHLEN - 1)) {
+               if (leafname) {
+                       path[len-1] = '/';
+                       len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
+                       if (len > MAXPATHLEN) {
+                               char *ptr;
+                       
+                               // the string got truncated!
+                               *truncated_path = 1;
+                               ptr = my_strrchr(path, '/');
+                               if (ptr) {
+                                       *ptr = '\0';   // chop off the string at the last directory component
+                               }
+                               len = strlen(path) + 1;
+                       }
+               }
+       } else if (ret == 0) {
+               *truncated_path = 1;
+       } else if (ret != 0) {
+               struct vnode *mydvp=dvp;
+
+               if (ret != ENOSPC) {
+                       printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
+                              dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
+               }                               
+               *truncated_path = 1;
+               
+               do {
+                       if (mydvp->v_parent != NULL) {
+                               mydvp = mydvp->v_parent;
+                       } else if (mydvp->v_mount) {
+                               strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
+                               break;
+                       } else {
+                               // no parent and no mount point?  only thing is to punt and say "/" changed
+                               strlcpy(path, "/", _len);
+                               len = 2;
+                               mydvp = NULL;
+                       }
+                       
+                       if (mydvp == NULL) {
+                               break;
+                       }
+
+                       len = _len;
+                       ret = vn_getpath(mydvp, path, &len);
+               } while (ret == ENOSPC);
+       }
+
+       return len;
+}
+
+
+/*
+ * Make a hard file link.
+ *
+ * Returns:    0                       Success
+ *             EPERM
+ *             EEXIST
+ *             EXDEV
+ *     namei:???
+ *     vnode_authorize:???
+ *     VNOP_LINK:???
+ */
+/* ARGSUSED */
+static int
+linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
+    user_addr_t link, int flag, enum uio_seg segflg)
+{
+       vnode_t vp, dvp, lvp;
+       struct nameidata nd;
+       int follow;
+       int error;
+#if CONFIG_FSE
+       fse_info finfo;
+#endif
+       int need_event, has_listeners;
+       char *target_path = NULL;
+       int truncated=0;
+
+       vp = dvp = lvp = NULLVP;
+
+       /* look up the object we are linking to */
+       follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
+       NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
+           segflg, path, ctx);
+
+       error = nameiat(&nd, fd1);
+       if (error)
+               return (error);
+       vp = nd.ni_vp;
+
+       nameidone(&nd);
+
+       /*
+        * Normally, linking to directories is not supported.
+        * However, some file systems may have limited support.
+        */
+       if (vp->v_type == VDIR) {
+               if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
+                       error = EPERM;   /* POSIX */
+                       goto out;
+               }
+               /* Linking to a directory requires ownership. */
+               if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
+                       struct vnode_attr dva;
+
+                       VATTR_INIT(&dva);
+                       VATTR_WANTED(&dva, va_uid);
+                       if (vnode_getattr(vp, &dva, ctx) != 0 ||
+                           !VATTR_IS_SUPPORTED(&dva, va_uid) ||
+                           (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
+                               error = EACCES;
+                               goto out;
+                       }
+               }
+       }
+
+       /* lookup the target node */
+#if CONFIG_TRIGGERS
+       nd.ni_op = OP_LINK;
+#endif
+       nd.ni_cnd.cn_nameiop = CREATE;
+       nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
+       nd.ni_dirp = link;
+       error = nameiat(&nd, fd2);
+       if (error != 0)
+               goto out;
+       dvp = nd.ni_dvp;
+       lvp = nd.ni_vp;
+
+#if CONFIG_MACF
+       if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
+               goto out2;
+#endif
+
+       /* or to anything that kauth doesn't want us to (eg. immutable items) */
+       if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
+               goto out2;
+
+       /* target node must not exist */
+       if (lvp != NULLVP) {
+               error = EEXIST;
+               goto out2;
+       }
+       /* cannot link across mountpoints */
+       if (vnode_mount(vp) != vnode_mount(dvp)) {
+               error = EXDEV;
+               goto out2;
+       }
+               
+       /* authorize creation of the target note */
+       if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
+               goto out2;
+
+       /* and finally make the link */
+       error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
+       if (error)
+               goto out2;
+
+#if CONFIG_MACF
+       (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
+#endif
+
+#if CONFIG_FSE
+       need_event = need_fsevent(FSE_CREATE_FILE, dvp);
+#else
+       need_event = 0;
+#endif
+       has_listeners = kauth_authorize_fileop_has_listeners();
+
+       if (need_event || has_listeners) {
+               char *link_to_path = NULL;
+               int len, link_name_len;
+
+               /* build the path to the new link file */
+               GET_PATH(target_path);
+               if (target_path == NULL) {
+                       error = ENOMEM;
+                       goto out2;
+               }
+
+               len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
+
+               if (has_listeners) {
+                       /* build the path to file we are linking to */
+                       GET_PATH(link_to_path);
+                       if (link_to_path == NULL) {
+                               error = ENOMEM;
+                               goto out2;
+                       }
+
+                       link_name_len = MAXPATHLEN;
+                       if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
+                               /*
+                                * Call out to allow 3rd party notification of rename. 
+                                * Ignore result of kauth_authorize_fileop call.
+                                */
+                               kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK, 
+                                                      (uintptr_t)link_to_path, 
+                                                      (uintptr_t)target_path);
+                       }
+                       if (link_to_path != NULL) {
+                               RELEASE_PATH(link_to_path);
+                       }
+               }
+#if CONFIG_FSE
+               if (need_event) {
+                       /* construct fsevent */
+                       if (get_fse_info(vp, &finfo, ctx) == 0) {
+                               if (truncated) {
+                                       finfo.mode |= FSE_TRUNCATED_PATH;
+                               }
+
+                               // build the path to the destination of the link
+                               add_fsevent(FSE_CREATE_FILE, ctx,
+                                           FSE_ARG_STRING, len, target_path,
+                                           FSE_ARG_FINFO, &finfo,
+                                           FSE_ARG_DONE);
+                       }
+                       if (vp->v_parent) {
+                           add_fsevent(FSE_STAT_CHANGED, ctx,
+                               FSE_ARG_VNODE, vp->v_parent,
+                               FSE_ARG_DONE);
+                       }
+               }
+#endif
+       }
+out2:
+       /*
+        * nameidone has to happen before we vnode_put(dvp)
+        * since it may need to release the fs_nodelock on the dvp
+        */
+       nameidone(&nd);
+       if (target_path != NULL) {
+               RELEASE_PATH(target_path);
+       }
+out:
+       if (lvp)
+               vnode_put(lvp);
+       if (dvp)
+               vnode_put(dvp);
+       vnode_put(vp);
+       return (error);
+}
+
+int
+link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
+{
+       return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
+           AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
+}
+
+int
+linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
+{
+       if (uap->flag & ~AT_SYMLINK_FOLLOW)
+               return (EINVAL);
+
+       return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
+           uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
+}
+
+/*
+ * Make a symbolic link.
+ *
+ * We could add support for ACLs here too...
+ */
+/* ARGSUSED */
+static int
+symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
+    user_addr_t link, enum uio_seg segflg)
+{
+       struct vnode_attr va;
+       char *path;
+       int error;
+       struct nameidata nd;
+       vnode_t vp, dvp;
+       uint32_t dfflags;       // Directory file flags
+       size_t dummy=0;
+       proc_t p;
+
+       error = 0;
+       if (UIO_SEG_IS_USER_SPACE(segflg)) {
+               MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+               error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
+       } else {
+               path = (char *)path_data;
+       }
+       if (error)
+               goto out;
+       AUDIT_ARG(text, path);  /* This is the link string */
+
+       NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
+            segflg, link, ctx);
+
+       error = nameiat(&nd, fd);
+       if (error)
+               goto out;
+       dvp = nd.ni_dvp;
+       vp = nd.ni_vp;
+
+       p = vfs_context_proc(ctx);
+       VATTR_INIT(&va);
+       VATTR_SET(&va, va_type, VLNK);
+       VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
+
+       /*
+        * Handle inheritance of restricted flag
+        */
+       error = vnode_flags(dvp, &dfflags, ctx);
+       if (error)
+               goto skipit;
+       if (dfflags & SF_RESTRICTED)
+               VATTR_SET(&va, va_flags, SF_RESTRICTED);
+
+#if CONFIG_MACF
+       error = mac_vnode_check_create(ctx,
+                       dvp, &nd.ni_cnd, &va);
+#endif
+       if (error != 0) {
+           goto skipit;
+       }
+
+       if (vp != NULL) {
+           error = EEXIST;
+           goto skipit;
+       }
+
+       /* authorize */
+       if (error == 0)
+               error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
+       /* get default ownership, etc. */
+       if (error == 0)
+               error = vnode_authattr_new(dvp, &va, 0, ctx);
+       if (error == 0)
+               error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
+
+#if CONFIG_MACF
+       if (error == 0)
+               error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
+#endif
+
+       /* do fallback attribute handling */
+       if (error == 0)
+               error = vnode_setattr_fallback(vp, &va, ctx);
+
+       if (error == 0) {
+               int     update_flags = 0;
+
+               if (vp == NULL) {
+                       nd.ni_cnd.cn_nameiop = LOOKUP;
+#if CONFIG_TRIGGERS
+                       nd.ni_op = OP_LOOKUP;
+#endif
+                       nd.ni_cnd.cn_flags = 0;
+                       error = nameiat(&nd, fd);
+                       vp = nd.ni_vp;
+
+                       if (vp == NULL)
+                               goto skipit;
+               }
+
+#if 0  /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
+               /* call out to allow 3rd party notification of rename.
+                * Ignore result of kauth_authorize_fileop call.
+                */
+               if (kauth_authorize_fileop_has_listeners() &&
+                   namei(&nd) == 0) {
+                       char *new_link_path = NULL;
+                       int             len;
+
+                       /* build the path to the new link file */
+                       new_link_path = get_pathbuff();
+                       len = MAXPATHLEN;
+                       vn_getpath(dvp, new_link_path, &len);
+                       if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
+                               new_link_path[len - 1] = '/';
+                               strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
+                       }
+
+                       kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
+                                          (uintptr_t)path, (uintptr_t)new_link_path);
+                       if (new_link_path != NULL)
+                               release_pathbuff(new_link_path);
+               }
+#endif
+               // Make sure the name & parent pointers are hooked up
+               if (vp->v_name == NULL)
+                       update_flags |= VNODE_UPDATE_NAME;
+               if (vp->v_parent == NULLVP)
+                       update_flags |= VNODE_UPDATE_PARENT;
+
+               if (update_flags)
+                       vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
+
+#if CONFIG_FSE
+               add_fsevent(FSE_CREATE_FILE, ctx,
+                           FSE_ARG_VNODE, vp,
+                           FSE_ARG_DONE);
+#endif
+       }
+
+skipit:
+       /*
+        * nameidone has to happen before we vnode_put(dvp)
+        * since it may need to release the fs_nodelock on the dvp
+        */
+       nameidone(&nd);
+
+       if (vp)
+               vnode_put(vp);
+       vnode_put(dvp);
+out:
+       if (path && (path != (char *)path_data))
+               FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
+
+       return (error);
+}
+
+int
+symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
+{
+       return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
+           uap->link, UIO_USERSPACE));
+}
+
+int
+symlinkat(__unused proc_t p, struct symlinkat_args *uap,
+    __unused int32_t *retval)
+{
+       return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
+           uap->path2, UIO_USERSPACE));
+}
+
+/*
+ * Delete a whiteout from the filesystem.
+ * No longer supported.
+ */
+int
+undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
+{
+       return (ENOTSUP);
+}
+
+/*
+ * Delete a name from the filesystem.
+ */
+/* ARGSUSED */
+static int
+unlink1at(vfs_context_t ctx, struct nameidata *ndp, int unlink_flags, int fd)
+{
+       vnode_t vp, dvp;
+       int error;
+       struct componentname *cnp;
+       char  *path = NULL;
+       int  len=0;
+#if CONFIG_FSE
+       fse_info  finfo;
+       struct vnode_attr va;
+#endif
+       int flags = 0;
+       int need_event = 0;
+       int has_listeners = 0;
+       int truncated_path=0;
+       int batched;
+       struct vnode_attr *vap = NULL;
+
+#if NAMEDRSRCFORK
+       /* unlink or delete is allowed on rsrc forks and named streams */
+       ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
+#endif
+
+       ndp->ni_cnd.cn_flags |= LOCKPARENT;
+       ndp->ni_flag |= NAMEI_COMPOUNDREMOVE;
+       cnp = &ndp->ni_cnd;
+
+lookup_continue:
+       error = nameiat(ndp, fd);
+       if (error)
+               return (error);
+
+       dvp = ndp->ni_dvp;
+       vp = ndp->ni_vp;
+
+
+       /* With Carbon delete semantics, busy files cannot be deleted */
+       if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
+               flags |= VNODE_REMOVE_NODELETEBUSY;
+       }
+       
+       /* Skip any potential upcalls if told to. */
+       if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
+               flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
+       }
+
+       if (vp) {
+               batched = vnode_compound_remove_available(vp);
+               /*
+                * The root of a mounted filesystem cannot be deleted.
+                */
+               if (vp->v_flag & VROOT) {
+                       error = EBUSY;
+               }
+
+               if (!batched) {
+                       error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
+                       if (error) {
+                               goto out;
+                       }
+               }
+       } else {
+               batched = 1;
+
+               if (!vnode_compound_remove_available(dvp)) {
+                       panic("No vp, but no compound remove?");
+               }
+       }
+
+#if CONFIG_FSE
+       need_event = need_fsevent(FSE_DELETE, dvp);
+       if (need_event) {
+               if (!batched) {
+                       if ((vp->v_flag & VISHARDLINK) == 0) {
+                               /* XXX need to get these data in batched VNOP */
+                               get_fse_info(vp, &finfo, ctx);
+                       }
+               } else {
+                       error = vfs_get_notify_attributes(&va);
+                       if (error) {
+                               goto out;
+                       }
+
+                       vap = &va;
+               }
+       }
+#endif
+       has_listeners = kauth_authorize_fileop_has_listeners();
+       if (need_event || has_listeners) {
+               if (path == NULL) {
+                       GET_PATH(path);
+                       if (path == NULL) {
+                               error = ENOMEM;
+                               goto out;
+                       }
+               }
+               len = safe_getpath(dvp, ndp->ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
+       }
+
+#if NAMEDRSRCFORK
+       if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK)
+               error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
+       else
+#endif
+       {
+               error = vn_remove(dvp, &ndp->ni_vp, ndp, flags, vap, ctx);
+               vp = ndp->ni_vp;
+               if (error == EKEEPLOOKING) {
+                       if (!batched) {
+                               panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
+                       }
+
+                       if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
+                               panic("EKEEPLOOKING, but continue flag not set?");
+                       }
+
+                       if (vnode_isdir(vp)) {
+                               error = EISDIR;
+                               goto out;
+                       }
+                       goto lookup_continue;
+               }
+       }
+
+       /*
+        * Call out to allow 3rd party notification of delete. 
+        * Ignore result of kauth_authorize_fileop call.
+        */
+       if (!error) {
+               if (has_listeners) {
+                       kauth_authorize_fileop(vfs_context_ucred(ctx), 
+                               KAUTH_FILEOP_DELETE, 
+                               (uintptr_t)vp,
+                               (uintptr_t)path);
+               }
+
+               if (vp->v_flag & VISHARDLINK) {
+                   //
+                   // if a hardlink gets deleted we want to blow away the
+                   // v_parent link because the path that got us to this
+                   // instance of the link is no longer valid.  this will
+                   // force the next call to get the path to ask the file
+                   // system instead of just following the v_parent link.
+                   //
+                   vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
+               }
+
+#if CONFIG_FSE
+               if (need_event) {
+                       if (vp->v_flag & VISHARDLINK) {
+                               get_fse_info(vp, &finfo, ctx);
+                       } else if (vap) {
+                               vnode_get_fse_info_from_vap(vp, &finfo, vap);
+                       }
+                       if (truncated_path) {
+                               finfo.mode |= FSE_TRUNCATED_PATH;
+                       }
+                       add_fsevent(FSE_DELETE, ctx,
+                                               FSE_ARG_STRING, len, path,
+                                               FSE_ARG_FINFO, &finfo,
+                                               FSE_ARG_DONE);
+               }
+#endif
+       }
+
+out:
+       if (path != NULL)
+               RELEASE_PATH(path);
+
+#if NAMEDRSRCFORK
+       /* recycle the deleted rsrc fork vnode to force a reclaim, which 
+        * will cause its shadow file to go away if necessary.
+        */
+        if (vp && (vnode_isnamedstream(vp)) &&
+               (vp->v_parent != NULLVP) &&
+               vnode_isshadow(vp)) {
+                       vnode_recycle(vp);
+        }      
+#endif
+       /*
+        * nameidone has to happen before we vnode_put(dvp)
+        * since it may need to release the fs_nodelock on the dvp
+        */
+       nameidone(ndp);
+       vnode_put(dvp);
+       if (vp) {
+               vnode_put(vp);
+       }
+       return (error);
+}
+
+int
+unlink1(vfs_context_t ctx, struct nameidata *ndp, int unlink_flags)
+{
+       return (unlink1at(ctx, ndp, unlink_flags, AT_FDCWD));
+}
+
+/*
+ * Delete a name from the filesystem using POSIX semantics.
+ */
+static int
+unlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
+    enum uio_seg segflg)
+{
+       struct nameidata nd;
+
+       NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, segflg,
+              path, ctx);
+       return (unlink1at(ctx, &nd, 0, fd));
+}
+
+int
+unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
+{
+       return (unlinkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
+           UIO_USERSPACE));
+}
+
+int
+unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
+{
+       if (uap->flag & ~AT_REMOVEDIR)
+               return (EINVAL);
+
+       if (uap->flag & AT_REMOVEDIR)
+               return (rmdirat_internal(vfs_context_current(), uap->fd,
+                   uap->path, UIO_USERSPACE));
+       else
+               return (unlinkat_internal(vfs_context_current(), uap->fd,
+                   uap->path, UIO_USERSPACE));
+}
+
+/*
+ * Delete a name from the filesystem using Carbon semantics.
+ */
+int
+delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
+{
+       struct nameidata nd;
+       vfs_context_t ctx = vfs_context_current();
+
+       NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE,
+              uap->path, ctx);
+       return unlink1(ctx, &nd, VNODE_REMOVE_NODELETEBUSY);
+}
+
+/*
+ * Reposition read/write file offset.
+ */
+int
+lseek(proc_t p, struct lseek_args *uap, off_t *retval)
+{
+       struct fileproc *fp;
+       vnode_t vp;
+       struct vfs_context *ctx;
+       off_t offset = uap->offset, file_size;
+       int error;
+
+       if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
+               if (error == ENOTSUP)
+                       return (ESPIPE);
+               return (error);
+       }
+       if (vnode_isfifo(vp)) {
+               file_drop(uap->fd);
+               return(ESPIPE);
+       }
+
+
+       ctx = vfs_context_current();
+#if CONFIG_MACF
+       if (uap->whence == L_INCR && uap->offset == 0)
+               error = mac_file_check_get_offset(vfs_context_ucred(ctx),
+                   fp->f_fglob);
+       else
+               error = mac_file_check_change_offset(vfs_context_ucred(ctx),
+                   fp->f_fglob);
+       if (error) {
+               file_drop(uap->fd);
+               return (error);
+       }
+#endif
+       if ( (error = vnode_getwithref(vp)) ) {
+               file_drop(uap->fd);
+               return(error);
+       }
+
+       switch (uap->whence) {
+       case L_INCR:
+               offset += fp->f_fglob->fg_offset;
+               break;
+       case L_XTND:
+               if ((error = vnode_size(vp, &file_size, ctx)) != 0)
+                       break;
+               offset += file_size;
+               break;
+       case L_SET:
+               break;
+       default:
+               error = EINVAL;
+       }
+       if (error == 0) {
+               if (uap->offset > 0 && offset < 0) {
+                       /* Incremented/relative move past max size */
+                       error = EOVERFLOW;
+               } else {
+                       /*
+                        * Allow negative offsets on character devices, per
+                        * POSIX 1003.1-2001.  Most likely for writing disk
+                        * labels.
+                        */
+                       if (offset < 0 && vp->v_type != VCHR) {
+                               /* Decremented/relative move before start */
+                               error = EINVAL;
+                       } else {
+                               /* Success */
+                               fp->f_fglob->fg_offset = offset;
+                               *retval = fp->f_fglob->fg_offset;
+                       }
+               }
+       }
+
+       /* 
+        * An lseek can affect whether data is "available to read."  Use
+        * hint of NOTE_NONE so no EVFILT_VNODE events fire
+        */
+       post_event_if_success(vp, error, NOTE_NONE);
+       (void)vnode_put(vp);
+       file_drop(uap->fd);
+       return (error);
+}
+
+
+/*
+ * Check access permissions.
+ *
+ * Returns:    0                       Success
+ *             vnode_authorize:???
+ */
+static int
+access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
+{
+       kauth_action_t action;
+       int error;
+
+       /*
+        * If just the regular access bits, convert them to something
+        * that vnode_authorize will understand.
+        */
+       if (!(uflags & _ACCESS_EXTENDED_MASK)) {
+               action = 0;
+               if (uflags & R_OK)
+                       action |= KAUTH_VNODE_READ_DATA;        /* aka KAUTH_VNODE_LIST_DIRECTORY */
+               if (uflags & W_OK) {
+                       if (vnode_isdir(vp)) {
+                               action |= KAUTH_VNODE_ADD_FILE |
+                                   KAUTH_VNODE_ADD_SUBDIRECTORY;
+                               /* might want delete rights here too */
+                       } else {
+                               action |= KAUTH_VNODE_WRITE_DATA;
+                       }
+               }
+               if (uflags & X_OK) {
+                       if (vnode_isdir(vp)) {
+                               action |= KAUTH_VNODE_SEARCH;
+                       } else {
+                               action |= KAUTH_VNODE_EXECUTE;
+                       }
+               }
+       } else {
+               /* take advantage of definition of uflags */
+               action = uflags >> 8;
+       }
+       
+#if CONFIG_MACF
+       error = mac_vnode_check_access(ctx, vp, uflags);
+       if (error)
+               return (error);
+#endif /* MAC */
+
+       /* action == 0 means only check for existence */
+       if (action != 0) {
+               error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
+       } else {
+               error = 0;
+       }
+
+       return(error);
+}
+
+
+
+/*
+ * access_extended: Check access permissions in bulk.
+ *
+ * Description:        uap->entries            Pointer to an array of accessx
+ *                                     descriptor structs, plus one or 
+ *                                     more NULL terminated strings (see 
+ *                                     "Notes" section below).
+ *             uap->size               Size of the area pointed to by
+ *                                     uap->entries.
+ *             uap->results            Pointer to the results array.
+ *
+ * Returns:    0                       Success
+ *             ENOMEM                  Insufficient memory
+ *             EINVAL                  Invalid arguments
+ *             namei:EFAULT            Bad address
+ *             namei:ENAMETOOLONG      Filename too long
+ *             namei:ENOENT            No such file or directory
+ *             namei:ELOOP             Too many levels of symbolic links
+ *             namei:EBADF             Bad file descriptor
+ *             namei:ENOTDIR           Not a directory
+ *             namei:???
+ *             access1:
+ *
+ * Implicit returns:
+ *             uap->results            Array contents modified
+ *
+ * Notes:      The uap->entries are structured as an arbitrary length array
+ *             of accessx descriptors, followed by one or more NULL terminated
+ *             strings
+ *
+ *                     struct accessx_descriptor[0]
+ *                     ...
+ *                     struct accessx_descriptor[n]
+ *                     char name_data[0];
+ *
+ *             We determine the entry count by walking the buffer containing
+ *             the uap->entries argument descriptor.  For each descriptor we
+ *             see, the valid values for the offset ad_name_offset will be
+ *             in the byte range:
+ *
+ *                     [ uap->entries + sizeof(struct accessx_descriptor) ]
+ *                                             to
+ *                             [ uap->entries + uap->size - 2 ]
+ *
+ *             since we must have at least one string, and the string must
+ *             be at least one character plus the NULL terminator in length.
+ *             
+ * XXX:                Need to support the check-as uid argument
+ */
+int
+access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
+{
+       struct accessx_descriptor *input = NULL;
+       errno_t *result = NULL;
+       errno_t error = 0;
+       int wantdelete = 0;
+       unsigned int desc_max, desc_actual, i, j;
+       struct vfs_context context;
+       struct nameidata nd;
+       int niopts;
+       vnode_t vp = NULL;
+       vnode_t dvp = NULL;
+#define ACCESSX_MAX_DESCR_ON_STACK 10
+       struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
+
+       context.vc_ucred = NULL;
  
         /*
          * Validate parameters; if valid, copy the descriptor array and string
  
         /*
          * Validate parameters; if valid, copy the descriptor array and string
@@ -3714,7 +5063,9 @@ access_extended(__unused proc_t p, struct access_extended_args *uap, __unused in
                                 niopts |= WANTPARENT;
  
                         /* do the lookup */
                                 niopts |= WANTPARENT;
  
                         /* do the lookup */
-                       NDINIT(&nd, LOOKUP, niopts, UIO_SYSSPACE, CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset), &context);
+                       NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
+                              CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
+                              &context);
                         error = namei(&nd);
                         if (!error) {
                                 vp = nd.ni_vp;
                         error = namei(&nd);
                         if (!error) {
                                 vp = nd.ni_vp;
@@ -3776,8 +5127,9 @@ out:
   *             namei:???
   *             access1:
   */
   *             namei:???
   *             access1:
   */
-int
-access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
+static int
+faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
+    int flag, enum uio_seg segflg)
  {
         int error;
         struct nameidata nd;
  {
         int error;
         struct nameidata nd;
@@ -3788,26 +5140,31 @@ access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
  #endif
  
         /*
  #endif
  
         /*
-        * Access is defined as checking against the process'
-        * real identity, even if operations are checking the
-        * effective identity.  So we need to tweak the credential
-        * in the context.
+        * Unless the AT_EACCESS option is used, Access is defined as checking
+        * against the process' real identity, even if operations are checking
+        * the effective identity.  So we need to tweak the credential
+        * in the context for that case.
          */
          */
-       context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
-       context.vc_thread = current_thread();
+       if (!(flag & AT_EACCESS))
+               context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
+       else
+               context.vc_ucred = ctx->vc_ucred;
+       context.vc_thread = ctx->vc_thread;
+
  
         niopts = FOLLOW | AUDITVNPATH1;
         /* need parent for vnode_authorize for deletion test */
  
         niopts = FOLLOW | AUDITVNPATH1;
         /* need parent for vnode_authorize for deletion test */
-       if (uap->flags & _DELETE_OK)
+       if (amode & _DELETE_OK)
                 niopts |= WANTPARENT;
                 niopts |= WANTPARENT;
-       NDINIT(&nd, LOOKUP, niopts, UIO_USERSPACE, uap->path, &context);
+       NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
+              path, &context);
  
  #if NAMEDRSRCFORK
         /* access(F_OK) calls are allowed for resource forks. */
  
  #if NAMEDRSRCFORK
         /* access(F_OK) calls are allowed for resource forks. */
-       if (uap->flags == F_OK)
+       if (amode == F_OK)
                 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
  #endif
                 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
  #endif
-       error = namei(&nd);
+       error = nameiat(&nd, fd);
         if (error)
                 goto out;
  
         if (error)
                 goto out;
  
@@ -3824,7 +5181,7 @@ access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
         }
  #endif
  
         }
  #endif
  
-       error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context);
+       error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
  
  #if NAMEDRSRCFORK
         if (is_namedstream) {
  
  #if NAMEDRSRCFORK
         if (is_namedstream) {
@@ -3833,15 +5190,33 @@ access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
  #endif
  
         vnode_put(nd.ni_vp);
  #endif
  
         vnode_put(nd.ni_vp);
-       if (uap->flags & _DELETE_OK)
+       if (amode & _DELETE_OK)
                 vnode_put(nd.ni_dvp);
         nameidone(&nd);
    
  out:
                 vnode_put(nd.ni_dvp);
         nameidone(&nd);
    
  out:
-       kauth_cred_unref(&context.vc_ucred);
-       return(error);
+       if (!(flag & AT_EACCESS))
+               kauth_cred_unref(&context.vc_ucred);
+       return (error);
+}
+
+int
+access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
+{
+       return (faccessat_internal(vfs_context_current(), AT_FDCWD,
+           uap->path, uap->flags, 0, UIO_USERSPACE));
  }
  
  }
  
+int
+faccessat(__unused proc_t p, struct faccessat_args *uap,
+          __unused int32_t *retval)
+{
+       if (uap->flag & ~AT_EACCESS)
+               return (EINVAL);
+
+       return (faccessat_internal(vfs_context_current(), uap->fd,
+           uap->path, uap->amode, uap->flag, UIO_USERSPACE));
+}
  
  /*
   * Returns:    0                       Success
  
  /*
   * Returns:    0                       Success
@@ -3851,8 +5226,12 @@ out:
   *     vn_stat:???
   */
  static int
   *     vn_stat:???
   */
  static int
-stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
+fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
+    user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
+    enum uio_seg segflg, int fd, int flag)
  {
  {
+       struct nameidata nd;
+       int follow;
         union {
                 struct stat sb;
                 struct stat64 sb64;
         union {
                 struct stat sb;
                 struct stat64 sb64;
@@ -3869,12 +5248,16 @@ stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsec
         size_t xsecurity_bufsize;
         void * statptr;
  
         size_t xsecurity_bufsize;
         void * statptr;
  
+       follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
+       NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
+           segflg, path, ctx);
+
  #if NAMEDRSRCFORK
         int is_namedstream = 0;
         /* stat calls are allowed for resource forks. */
  #if NAMEDRSRCFORK
         int is_namedstream = 0;
         /* stat calls are allowed for resource forks. */
-       ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
+       nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
  #endif
  #endif
-       error = namei(ndp);
+       error = nameiat(&nd, fd);
         if (error)
                 return (error);
         fsec = KAUTH_FILESEC_NONE;
         if (error)
                 return (error);
         fsec = KAUTH_FILESEC_NONE;
@@ -3886,23 +5269,23 @@ stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsec
          * force an inactive on release which will mark it 
          * for recycle.
          */
          * force an inactive on release which will mark it 
          * for recycle.
          */
-       if (vnode_isnamedstream(ndp->ni_vp) &&
-           (ndp->ni_vp->v_parent != NULLVP) &&
-           vnode_isshadow(ndp->ni_vp)) {
+       if (vnode_isnamedstream(nd.ni_vp) &&
+           (nd.ni_vp->v_parent != NULLVP) &&
+           vnode_isshadow(nd.ni_vp)) {
                 is_namedstream = 1;
                 is_namedstream = 1;
-               vnode_ref(ndp->ni_vp);
+               vnode_ref(nd.ni_vp);
         }
  #endif
  
         }
  #endif
  
-       error = vn_stat(ndp->ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
+       error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
  
  #if NAMEDRSRCFORK
         if (is_namedstream) {
  
  #if NAMEDRSRCFORK
         if (is_namedstream) {
-               vnode_rele(ndp->ni_vp);
+               vnode_rele(nd.ni_vp);
         }
  #endif
         }
  #endif
-       vnode_put(ndp->ni_vp);
-       nameidone(ndp);
+       vnode_put(nd.ni_vp);
+       nameidone(&nd);
  
         if (error)
                 return (error);
  
         if (error)
                 return (error);
@@ -3980,23 +5363,6 @@ out:
         return (error);
  }
  
         return (error);
  }
  
-/*
- * Get file status; this version follows links.
- *
- * Returns:    0                       Success
- *     stat2:???                       [see stat2() in this file]
- */
-static int
-stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
-{
-       struct nameidata nd;
-       vfs_context_t ctx = vfs_context_current();
-
-       NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1, 
-           UIO_USERSPACE, path, ctx);
-       return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
-}
-
  /*
   * stat_extended: Get file status; with extended security (ACL).
   *
  /*
   * stat_extended: Get file status; with extended security (ACL).
   *
@@ -4014,25 +5380,30 @@ stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecu
   *
   */
  int
   *
   */
  int
-stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused int32_t *retval)
+stat_extended(__unused proc_t p, struct stat_extended_args *uap,
+    __unused int32_t *retval)
  {
  {
-       return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
+       return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+           uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
+           0));
  }
  
  /*
   * Returns:    0                       Success
  }
  
  /*
   * Returns:    0                       Success
- *     stat1:???                       [see stat1() in this file]
+ *     fstatat_internal:???            [see fstatat_internal() in this file]
   */
  int
  stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
  {
   */
  int
  stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
  {
-       return(stat1(uap->path, uap->ub, 0, 0, 0));
+       return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+           0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
  }
  
  int
  stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
  {
  }
  
  int
  stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
  {
-       return(stat1(uap->path, uap->ub, 0, 0, 1));
+       return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+           0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
  }
  
  /*
  }
  
  /*
@@ -4054,21 +5425,9 @@ stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
  int
  stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
  {
  int
  stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
  {
-       return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
-}
-/*
- * Get file status; this version does not follow links.
- */
-static int
-lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
-{
-       struct nameidata nd;
-       vfs_context_t ctx = vfs_context_current();
-
-       NDINIT(&nd, LOOKUP, NOTRIGGER | NOFOLLOW | AUDITVNPATH1, 
-           UIO_USERSPACE, path, ctx);
-
-       return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
+       return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+           uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
+           0));
  }
  
  /*
  }
  
  /*
@@ -4090,19 +5449,26 @@ lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsec
  int
  lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
  {
  int
  lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
  {
-       return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
+       return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+           uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
+           AT_SYMLINK_NOFOLLOW));
  }
  
  }
  
+/*
+ * Get file status; this version does not follow links.
+ */
  int
  lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
  {
  int
  lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
  {
-       return(lstat1(uap->path, uap->ub, 0, 0, 0));
+       return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+           0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
  }
  
  int
  lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
  {
  }
  
  int
  lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
  {
-       return(lstat1(uap->path, uap->ub, 0, 0, 1));
+       return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+           0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
  }
  
  /*
  }
  
  /*
@@ -4125,7 +5491,30 @@ lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
  int
  lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
  {
  int
  lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
  {
-       return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
+       return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+           uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
+           AT_SYMLINK_NOFOLLOW));
+}
+
+int
+fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
+{
+       if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
+               return (EINVAL);
+
+       return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+           0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
+}
+
+int
+fstatat64(__unused proc_t p, struct fstatat64_args *uap,
+    __unused int32_t *retval)
+{
+       if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
+               return (EINVAL);
+
+       return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+           0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
  }
  
  /*
  }
  
  /*
@@ -4151,7 +5540,7 @@ pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
         struct nameidata nd;
         vfs_context_t ctx = vfs_context_current();
  
         struct nameidata nd;
         vfs_context_t ctx = vfs_context_current();
  
-       NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
+       NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1, 
                 UIO_USERSPACE, uap->path, ctx);
         error = namei(&nd);
         if (error)
                 UIO_USERSPACE, uap->path, ctx);
         error = namei(&nd);
         if (error)
@@ -4168,48 +5557,69 @@ pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
   * Return target name of a symbolic link.
   */
  /* ARGSUSED */
   * Return target name of a symbolic link.
   */
  /* ARGSUSED */
-int
-readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
+static int
+readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
+    enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
+    int *retval)
  {
         vnode_t vp;
         uio_t auio;
  {
         vnode_t vp;
         uio_t auio;
-       int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
         int error;
         struct nameidata nd;
         int error;
         struct nameidata nd;
-       vfs_context_t ctx = vfs_context_current();
         char uio_buf[ UIO_SIZEOF(1) ];
  
         char uio_buf[ UIO_SIZEOF(1) ];
  
-       NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNPATH1, 
-               UIO_USERSPACE, uap->path, ctx);
-       error = namei(&nd);
+       NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
+           seg, path, ctx);
+
+       error = nameiat(&nd, fd);
         if (error)
                 return (error);
         vp = nd.ni_vp;
  
         nameidone(&nd);
  
         if (error)
                 return (error);
         vp = nd.ni_vp;
  
         nameidone(&nd);
  
-       auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, 
-                                                                 &uio_buf[0], sizeof(uio_buf));
-       uio_addiov(auio, uap->buf, uap->count);
-       if (vp->v_type != VLNK)
+       auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
+                                    &uio_buf[0], sizeof(uio_buf));
+       uio_addiov(auio, buf, bufsize);
+       if (vp->v_type != VLNK) {
                 error = EINVAL;
                 error = EINVAL;
-       else {
+       } else {
  #if CONFIG_MACF
  #if CONFIG_MACF
-               error = mac_vnode_check_readlink(ctx,
-                   vp);
+               error = mac_vnode_check_readlink(ctx, vp);
  #endif
                 if (error == 0)
  #endif
                 if (error == 0)
-                       error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx);
+                       error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
+                                               ctx);
                 if (error == 0)
                         error = VNOP_READLINK(vp, auio, ctx);
         }
         vnode_put(vp);
  
                 if (error == 0)
                         error = VNOP_READLINK(vp, auio, ctx);
         }
         vnode_put(vp);
  
-       /* Safe: uio_resid() is bounded above by "count", and "count" is an int  */
-       *retval = uap->count - (int)uio_resid(auio);
+       *retval = bufsize - (int)uio_resid(auio);
         return (error);
  }
  
         return (error);
  }
  
+int
+readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
+{
+       enum uio_seg procseg;
+
+       procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+       return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
+           CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
+           uap->count, procseg, retval));
+}
+
+int
+readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
+{
+       enum uio_seg procseg;
+
+       procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+       return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
+           procseg, uap->buf, uap->bufsize, procseg, retval));
+}
+
  /*
   * Change file flags.
   */
  /*
   * Change file flags.
   */
@@ -4262,7 +5672,7 @@ chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
         struct nameidata nd;
  
         AUDIT_ARG(fflags, uap->flags);
         struct nameidata nd;
  
         AUDIT_ARG(fflags, uap->flags);
-       NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
+       NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1, 
                 UIO_USERSPACE, uap->path, ctx);
         error = namei(&nd);
         if (error)
                 UIO_USERSPACE, uap->path, ctx);
         error = namei(&nd);
         if (error)
@@ -4316,7 +5726,7 @@ fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
   *             translated to EPERM before being returned.
   */
  static int
   *             translated to EPERM before being returned.
   */
  static int
-chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
+chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
  {
         kauth_action_t action;
         int error;
  {
         kauth_action_t action;
         int error;
@@ -4332,8 +5742,8 @@ chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
  #endif
  
  #if CONFIG_MACF
  #endif
  
  #if CONFIG_MACF
-       error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode);
-       if (error)
+       if (VATTR_IS_ACTIVE(vap, va_mode) &&
+           (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
                 return (error);
  #endif
  
                 return (error);
  #endif
  
@@ -4356,19 +5766,21 @@ chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
   *
   * Returns:    0                       Success
   *             namei:???               [anything namei can return]
   *
   * Returns:    0                       Success
   *             namei:???               [anything namei can return]
- *             chmod2:???              [anything chmod2 can return]
+ *             chmod_vnode:???         [anything chmod_vnode can return]
   */
  static int
   */
  static int
-chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
+chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
+    int fd, int flag, enum uio_seg segflg)
  {
         struct nameidata nd;
  {
         struct nameidata nd;
-       int error;
+       int follow, error;
  
  
-       NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
-               UIO_USERSPACE, path, ctx);
-       if ((error = namei(&nd)))
+       follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
+       NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
+           segflg, path, ctx);
+       if ((error = nameiat(&nd, fd)))
                 return (error);
                 return (error);
-       error = chmod2(ctx, nd.ni_vp, vap);
+       error = chmod_vnode(ctx, nd.ni_vp, vap);
         vnode_put(nd.ni_vp);
         nameidone(&nd);
         return(error);
         vnode_put(nd.ni_vp);
         nameidone(&nd);
         return(error);
@@ -4429,7 +5841,8 @@ chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int3
                 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
         }
  
                 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
         }
  
-       error = chmod1(vfs_context_current(), uap->path, &va);
+       error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
+           UIO_USERSPACE);
  
         if (xsecdst != NULL)
                 kauth_filesec_free(xsecdst);
  
         if (xsecdst != NULL)
                 kauth_filesec_free(xsecdst);
@@ -4438,17 +5851,35 @@ chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int3
  
  /*
   * Returns:    0                       Success
  
  /*
   * Returns:    0                       Success
- *             chmod1:???              [anything chmod1 can return]
+ *             chmodat:???             [anything chmodat can return]
   */
   */
-int
-chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
+static int
+fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
+    int flag, enum uio_seg segflg)
  {
         struct vnode_attr va;
  
         VATTR_INIT(&va);
  {
         struct vnode_attr va;
  
         VATTR_INIT(&va);
-       VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
+       VATTR_SET(&va, va_mode, mode & ALLPERMS);
+
+       return (chmodat(ctx, path, &va, fd, flag, segflg));
+}
+
+int
+chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
+{
+       return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
+           AT_FDCWD, 0, UIO_USERSPACE));
+}
+
+int
+fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
+{
+       if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
+               return (EINVAL);
  
  
-       return(chmod1(vfs_context_current(), uap->path, &va));
+       return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
+           uap->fd, uap->flag, UIO_USERSPACE));
  }
  
  /*
  }
  
  /*
@@ -4470,7 +5901,7 @@ fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
         }
         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
  
         }
         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
  
-       error = chmod2(vfs_context_current(), vp, vap);
+       error = chmod_vnode(vfs_context_current(), vp, vap);
         (void)vnode_put(vp);
         file_drop(fd);
  
         (void)vnode_put(vp);
         file_drop(fd);
  
@@ -4517,6 +5948,10 @@ fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *re
         case USER_ADDR_NULL:
                 VATTR_SET(&va, va_acl, NULL);
                 break;
         case USER_ADDR_NULL:
                 VATTR_SET(&va, va_acl, NULL);
                 break;
+       case CAST_USER_ADDR_T((void *)1):       /* _FILESEC_REMOVE_ACL */
+               VATTR_SET(&va, va_acl, NULL);
+               break;
+               /* not being set */
         case CAST_USER_ADDR_T(-1):
                 break;
         default:
         case CAST_USER_ADDR_T(-1):
                 break;
         default:
@@ -4556,19 +5991,22 @@ fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
   */
  /* ARGSUSED */
  static int
   */
  /* ARGSUSED */
  static int
-chown1(vfs_context_t ctx, struct chown_args *uap, __unused int32_t *retval, int follow)
+fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
+   gid_t gid, int flag, enum uio_seg segflg)
  {
         vnode_t vp;
         struct vnode_attr va;
         int error;
         struct nameidata nd;
  {
         vnode_t vp;
         struct vnode_attr va;
         int error;
         struct nameidata nd;
+       int follow;
         kauth_action_t action;
  
         kauth_action_t action;
  
-       AUDIT_ARG(owner, uap->uid, uap->gid);
+       AUDIT_ARG(owner, uid, gid);
  
  
-       NDINIT(&nd, LOOKUP, (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1, 
-               UIO_USERSPACE, uap->path, ctx);
-       error = namei(&nd);
+       follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
+       NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
+           path, ctx);
+       error = nameiat(&nd, fd);
         if (error)
                 return (error);
         vp = nd.ni_vp;
         if (error)
                 return (error);
         vp = nd.ni_vp;
@@ -4576,13 +6014,13 @@ chown1(vfs_context_t ctx, struct chown_args *uap, __unused int32_t *retval, int
         nameidone(&nd);
  
         VATTR_INIT(&va);
         nameidone(&nd);
  
         VATTR_INIT(&va);
-       if (uap->uid != VNOVAL)
-               VATTR_SET(&va, va_uid, uap->uid);
-       if (uap->gid != VNOVAL)
-               VATTR_SET(&va, va_gid, uap->gid);
+       if (uid != (uid_t)VNOVAL)
+               VATTR_SET(&va, va_uid, uid);
+       if (gid != (gid_t)VNOVAL)
+               VATTR_SET(&va, va_gid, gid);
  
  #if CONFIG_MACF
  
  #if CONFIG_MACF
-       error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
+       error = mac_vnode_check_setowner(ctx, vp, uid, gid);
         if (error)
                 goto out;
  #endif
         if (error)
                 goto out;
  #endif
@@ -4601,22 +6039,33 @@ out:
          */
         if (error == EACCES)
                 error = EPERM;
          */
         if (error == EACCES)
                 error = EPERM;
-       
+
         vnode_put(vp);
         return (error);
  }
  
  int
         vnode_put(vp);
         return (error);
  }
  
  int
-chown(__unused proc_t p, struct chown_args *uap, int32_t *retval)
+chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
  {
  {
-       return chown1(vfs_context_current(), uap, retval, 1);
+       return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
+           uap->uid, uap->gid, 0, UIO_USERSPACE));
  }
  
  int
  }
  
  int
-lchown(__unused proc_t p, struct lchown_args *uap, int32_t *retval)
+lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
  {
  {
-       /* Argument list identical, but machine generated; cast for chown1() */
-       return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0);
+       return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
+           uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
+}
+
+int
+fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
+{
+       if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
+               return (EINVAL);
+
+       return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
+           uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
  }
  
  /*
  }
  
  /*
@@ -4775,7 +6224,7 @@ utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
          * AUDIT: Needed to change the order of operations to do the 
          * name lookup first because auditing wants the path.
          */
          * AUDIT: Needed to change the order of operations to do the 
          * name lookup first because auditing wants the path.
          */
-       NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
+       NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1, 
                 UIO_USERSPACE, uap->path, ctx);
         error = namei(&nd);
         if (error)
                 UIO_USERSPACE, uap->path, ctx);
         error = namei(&nd);
         if (error)
@@ -4842,7 +6291,7 @@ truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
  
         if (uap->length < 0)
                 return(EINVAL);
  
         if (uap->length < 0)
                 return(EINVAL);
-       NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
+       NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1, 
                 UIO_USERSPACE, uap->path, ctx);
         if ((error = namei(&nd)))
                 return (error);
                 UIO_USERSPACE, uap->path, ctx);
         if ((error = namei(&nd)))
                 return (error);
@@ -4891,11 +6340,13 @@ ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
                 return(error);
         }
  
                 return(error);
         }
  
-       if (fp->f_fglob->fg_type == DTYPE_PSXSHM) {
+       switch (FILEGLOB_DTYPE(fp->f_fglob)) {
+       case DTYPE_PSXSHM:
                 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
                 goto out;
                 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
                 goto out;
-       }
-       if (fp->f_fglob->fg_type != DTYPE_VNODE)  {
+       case DTYPE_VNODE:
+               break;
+       default:
                 error = EINVAL;
                 goto out;
         }
                 error = EINVAL;
                 goto out;
         }
@@ -5054,14 +6505,15 @@ copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
                 return(EINVAL);
         }
  
                 return(EINVAL);
         }
  
-       NDINIT(&fromnd, LOOKUP, SAVESTART | AUDITVNPATH1,
+       NDINIT(&fromnd, LOOKUP, OP_COPYFILE, SAVESTART | AUDITVNPATH1,
                 UIO_USERSPACE, uap->from, ctx);
         if ((error = namei(&fromnd)))
                 return (error);
         fvp = fromnd.ni_vp;
  
                 UIO_USERSPACE, uap->from, ctx);
         if ((error = namei(&fromnd)))
                 return (error);
         fvp = fromnd.ni_vp;
  
-       NDINIT(&tond, CREATE,  LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
-           UIO_USERSPACE, uap->to, ctx);
+       NDINIT(&tond, CREATE, OP_LINK,
+              LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
+              UIO_USERSPACE, uap->to, ctx);
         if ((error = namei(&tond))) {
                 goto out1;
         }
         if ((error = namei(&tond))) {
                 goto out1;
         }
@@ -5123,91 +6575,123 @@ out1:
   * or both not be directories.  If target is a directory, it must be empty.
   */
  /* ARGSUSED */
   * or both not be directories.  If target is a directory, it must be empty.
   */
  /* ARGSUSED */
-int
-rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
+static int
+renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
+    int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
  {
         vnode_t tvp, tdvp;
         vnode_t fvp, fdvp;
  {
         vnode_t tvp, tdvp;
         vnode_t fvp, fdvp;
-       struct nameidata fromnd, tond;
-       vfs_context_t ctx = vfs_context_current();
+       struct nameidata *fromnd, *tond;
         int error;
         int do_retry;
         int mntrename;
         int need_event;
         int error;
         int do_retry;
         int mntrename;
         int need_event;
-       const char *oname;
+       const char *oname = NULL;
         char *from_name = NULL, *to_name = NULL;
         int from_len=0, to_len=0;
         int holding_mntlock;
         mount_t locked_mp = NULL;
         char *from_name = NULL, *to_name = NULL;
         int from_len=0, to_len=0;
         int holding_mntlock;
         mount_t locked_mp = NULL;
-       vnode_t oparent;
+       vnode_t oparent = NULLVP;
  #if CONFIG_FSE
         fse_info from_finfo, to_finfo;
  #endif
         int from_truncated=0, to_truncated;
  #if CONFIG_FSE
         fse_info from_finfo, to_finfo;
  #endif
         int from_truncated=0, to_truncated;
-       
+       int batched = 0;
+       struct vnode_attr *fvap, *tvap;
+       int continuing = 0;
+       /* carving out a chunk for structs that are too big to be on stack. */
+       struct {
+               struct nameidata from_node, to_node;
+               struct vnode_attr fv_attr, tv_attr;
+       } * __rename_data;
+       MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
+       fromnd = &__rename_data->from_node;
+       tond = &__rename_data->to_node;
+
         holding_mntlock = 0;
         holding_mntlock = 0;
-    do_retry = 0;
+       do_retry = 0;
  retry:
         fvp = tvp = NULL;
         fdvp = tdvp = NULL;
  retry:
         fvp = tvp = NULL;
         fdvp = tdvp = NULL;
+       fvap = tvap = NULL;
         mntrename = FALSE;
  
         mntrename = FALSE;
  
-       NDINIT(&fromnd, DELETE, WANTPARENT | AUDITVNPATH1, UIO_USERSPACE, uap->from, ctx);
-       
-       if ( (error = namei(&fromnd)) )
-               goto out1;
-       fdvp = fromnd.ni_dvp;
-       fvp  = fromnd.ni_vp;
+       NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
+           segflg, from, ctx);
+       fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
  
  
-#if CONFIG_MACF
-       error = mac_vnode_check_rename_from(ctx, fdvp, fvp, &fromnd.ni_cnd);
-       if (error)
-               goto out1;
-#endif
+       NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
+           segflg, to, ctx);
+       tond->ni_flag = NAMEI_COMPOUNDRENAME;
  
  
-       NDINIT(&tond, RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK , UIO_USERSPACE, uap->to, ctx);
-       if (fvp->v_type == VDIR)
-               tond.ni_cnd.cn_flags |= WILLBEDIR;
+continue_lookup:
+       if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
+               if ( (error = nameiat(fromnd, fromfd)) )
+                       goto out1;
+               fdvp = fromnd->ni_dvp;
+               fvp  = fromnd->ni_vp;
  
  
-       if ( (error = namei(&tond)) ) {
-               /*
-                * Translate error code for rename("dir1", "dir2/.").
-                */
-               if (error == EISDIR && fvp->v_type == VDIR) 
-                       error = EINVAL;
-               goto out1;
+               if (fvp && fvp->v_type == VDIR)
+                       tond->ni_cnd.cn_flags |= WILLBEDIR;
         }
         }
-       tdvp = tond.ni_dvp;
-       tvp  = tond.ni_vp;
-
-#if CONFIG_MACF
-       error = mac_vnode_check_rename_to(ctx,
-           tdvp, tvp, fdvp == tdvp, &tond.ni_cnd);
-       if (error)
-               goto out1;
-#endif
  
  
-       if (tvp != NULL) {
-               if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
-                       error = ENOTDIR;
+       if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
+               if ( (error = nameiat(tond, tofd)) ) {
+                       /*
+                        * Translate error code for rename("dir1", "dir2/.").
+                        */
+                       if (error == EISDIR && fvp->v_type == VDIR)
+                               error = EINVAL;
                         goto out1;
                         goto out1;
-               } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
-                       error = EISDIR;
+               }
+               tdvp = tond->ni_dvp;
+               tvp  = tond->ni_vp;
+       }
+
+       batched = vnode_compound_rename_available(fdvp);
+       if (!fvp) {
+               /*
+                * Claim: this check will never reject a valid rename.
+                * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
+                * Suppose fdvp and tdvp are not on the same mount.
+                * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem.  If fvp is the root,
+                *      then you can't move it to within another dir on the same mountpoint.
+                * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
+                *
+                * If this check passes, then we are safe to pass these vnodes to the same FS.
+                */
+               if (fdvp->v_mount != tdvp->v_mount) {
+                       error = EXDEV;
                         goto out1;
                 }
                         goto out1;
                 }
+               goto skipped_lookup;
         }
         }
-       if (fvp == tdvp) {
-               error = EINVAL;
-               goto out1;
+
+       if (!batched) {
+               error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL);
+               if (error) {
+                       if (error == ENOENT) {
+                               /*
+                                * We encountered a race where after doing the namei, tvp stops
+                                * being valid. If so, simply re-drive the rename call from the
+                                * top.
+                                */
+                               do_retry = 1;
+                       }
+                       goto out1;
+               }
         }
         }
+
          /*
           * If the source and destination are the same (i.e. they're
           * links to the same vnode) and the target file system is
           * case sensitive, then there is nothing to do.
          /*
           * If the source and destination are the same (i.e. they're
           * links to the same vnode) and the target file system is
           * case sensitive, then there is nothing to do.
+        *
+        * XXX Come back to this.
           */
         if (fvp == tvp) {
                 int pathconf_val;
           */
         if (fvp == tvp) {
                 int pathconf_val;
-               
+
                 /*
                  * Note: if _PC_CASE_SENSITIVE selector isn't supported,
                  * then assume that this file system is case sensitive.
                 /*
                  * Note: if _PC_CASE_SENSITIVE selector isn't supported,
                  * then assume that this file system is case sensitive.
@@ -5215,96 +6699,18 @@ retry:
                 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
                     pathconf_val != 0) {
                         goto out1;
                 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
                     pathconf_val != 0) {
                         goto out1;
-               }       
-       }
-
-       /*
-        * Authorization.
-        *
-        * If tvp is a directory and not the same as fdvp, or tdvp is not
-        * the same as fdvp, the node is moving between directories and we
-        * need rights to remove from the old and add to the new.
-        *
-        * If tvp already exists and is not a directory, we need to be
-        * allowed to delete it.
-        *
-        * Note that we do not inherit when renaming.
-        *
-        * XXX This needs to be revisited to implement the deferred-inherit bit
-        */
-       {
-               int moving = 0;
-
-               error = 0;
-               if ((tvp != NULL) && vnode_isdir(tvp)) {
-                       if (tvp != fdvp)
-                               moving = 1;
-               } else if (tdvp != fdvp) {
-                       moving = 1;
                 }
                 }
-               /*
-                * must have delete rights to remove the old name even in
-                * the simple case of fdvp == tdvp.
-                *
-                * If fvp is a directory, and we are changing it's parent,
-                * then we also need rights to rewrite its ".." entry as well.
-                */
-               if (vnode_isdir(fvp)) {
-                       if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE | KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
-                               goto auth_exit;
-               } else {
-               if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE, ctx)) != 0)
-                       goto auth_exit;
-               }
-               if (moving) {
-                       /* moving into tdvp or tvp, must have rights to add */
-                       if ((error = vnode_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp,
-                                NULL, 
-                                vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE,
-                                ctx)) != 0) {
-                /*
-                 * We could encounter a race where after doing the namei, tvp stops
-                 * being valid. If so, simply re-drive the rename call from the
-                 * top.
-                 */
-                 if (error == ENOENT) {
-                     do_retry = 1;
-                 }
-                               goto auth_exit;
-                       }
-               } else {
-                       /* node staying in same directory, must be allowed to add new name */
-                       if ((error = vnode_authorize(fdvp, NULL,
-                                vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, ctx)) != 0)
-                               goto auth_exit;
-               }
-               /* overwriting tvp */
-               if ((tvp != NULL) && !vnode_isdir(tvp) &&
-                   ((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx)) != 0)) {
-            /*
-             * We could encounter a race where after doing the namei, tvp stops
-             * being valid. If so, simply re-drive the rename call from the
-             * top.
-             */
-            if (error == ENOENT) {
-                do_retry = 1;
-            }
-                       goto auth_exit;
-               }
-                   
-               /* XXX more checks? */
-
-auth_exit:
-               /* authorization denied */
-               if (error != 0)
-                       goto out1;
         }
         }
+
         /*
          * Allow the renaming of mount points.
          * - target must not exist
          * - target must reside in the same directory as source
          * - union mounts cannot be renamed
          * - "/" cannot be renamed
         /*
          * Allow the renaming of mount points.
          * - target must not exist
          * - target must reside in the same directory as source
          * - union mounts cannot be renamed
          * - "/" cannot be renamed
+        *
+        * XXX Handle this in VFS after a continued lookup (if we missed
+        * in the cache to start off)
          */
         if ((fvp->v_flag & VROOT) &&
             (fvp->v_type == VDIR) &&
          */
         if ((fvp->v_flag & VROOT) &&
             (fvp->v_type == VDIR) &&
@@ -5314,7 +6720,7 @@ auth_exit:
             ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0)  &&
             (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
                 vnode_t coveredvp;
             ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0)  &&
             (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
                 vnode_t coveredvp;
-       
+
                 /* switch fvp to the covered vnode */
                 coveredvp = fvp->v_mount->mnt_vnodecovered;
                 if ( (vnode_getwithref(coveredvp)) ) {
                 /* switch fvp to the covered vnode */
                 coveredvp = fvp->v_mount->mnt_vnodecovered;
                 if ( (vnode_getwithref(coveredvp)) ) {
@@ -5334,35 +6740,6 @@ auth_exit:
                 error = EXDEV;
                 goto out1;
         }
                 error = EXDEV;
                 goto out1;
         }
-       /*
-        * Avoid renaming "." and "..".
-        */
-       if (fvp->v_type == VDIR &&
-           ((fdvp == fvp) ||
-            (fromnd.ni_cnd.cn_namelen == 1 && fromnd.ni_cnd.cn_nameptr[0] == '.') ||
-            ((fromnd.ni_cnd.cn_flags | tond.ni_cnd.cn_flags) & ISDOTDOT)) ) {
-               error = EINVAL;
-               goto out1;
-       }
-       /*
-        * The following edge case is caught here:
-        * (to cannot be a descendent of from)
-        *
-        *       o fdvp
-        *      /
-        *     /
-        *    o fvp
-        *     \
-        *      \
-        *       o tdvp
-        *      /
-        *     /
-        *    o tvp
-        */
-       if (tdvp->v_parent == fvp) {
-               error = EINVAL;
-               goto out1;
-       }
  
         /*
          * If source is the same as the destination (that is the
  
         /*
          * If source is the same as the destination (that is the
@@ -5378,14 +6755,16 @@ auth_exit:
          * source.  NOTE: Then the target is unlocked going into vnop_rename,
          * so not to cause locking problems. There is a single reference on tvp.
          *
          * source.  NOTE: Then the target is unlocked going into vnop_rename,
          * so not to cause locking problems. There is a single reference on tvp.
          *
-        * NOTE - that fvp == tvp also occurs if they are hard linked and 
+        * NOTE - that fvp == tvp also occurs if they are hard linked and
          * that correct behaviour then is just to return success without doing
          * anything.
          * that correct behaviour then is just to return success without doing
          * anything.
+        *
+        * XXX filesystem should take care of this itself, perhaps...
          */
         if (fvp == tvp && fdvp == tdvp) {
          */
         if (fvp == tvp && fdvp == tdvp) {
-               if (fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
-                   !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
-                         fromnd.ni_cnd.cn_namelen)) {
+               if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
+                   !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
+                         fromnd->ni_cnd.cn_namelen)) {
                         goto out1;
                 }
         }
                         goto out1;
                 }
         }
@@ -5427,7 +6806,7 @@ auth_exit:
                          * nameidone has to happen before we vnode_put(tvp)
                          * since it may need to release the fs_nodelock on the tvp
                          */
                          * nameidone has to happen before we vnode_put(tvp)
                          * since it may need to release the fs_nodelock on the tvp
                          */
-                       nameidone(&tond);
+                       nameidone(tond);
  
                         if (tvp)
                                 vnode_put(tvp);
  
                         if (tvp)
                                 vnode_put(tvp);
@@ -5437,7 +6816,7 @@ auth_exit:
                          * nameidone has to happen before we vnode_put(fdvp)
                          * since it may need to release the fs_nodelock on the fvp
                          */
                          * nameidone has to happen before we vnode_put(fdvp)
                          * since it may need to release the fs_nodelock on the fvp
                          */
-                       nameidone(&fromnd);
+                       nameidone(fromnd);
  
                         vnode_put(fvp);
                         vnode_put(fdvp);
  
                         vnode_put(fvp);
                         vnode_put(fdvp);
@@ -5450,7 +6829,7 @@ auth_exit:
         } else {
                 /*
                  * when we dropped the iocounts to take
         } else {
                 /*
                  * when we dropped the iocounts to take
-                * the lock, we allowed the identity of 
+                * the lock, we allowed the identity of
                  * the various vnodes to change... if they did,
                  * we may no longer be dealing with a rename
                  * that reshapes the tree... once we're holding
                  * the various vnodes to change... if they did,
                  * we may no longer be dealing with a rename
                  * that reshapes the tree... once we're holding
@@ -5464,17 +6843,35 @@ auth_exit:
                         holding_mntlock = 0;
                 }
         }
                         holding_mntlock = 0;
                 }
         }
+
         // save these off so we can later verify that fvp is the same
         oname   = fvp->v_name;
         oparent = fvp->v_parent;
  
         // save these off so we can later verify that fvp is the same
         oname   = fvp->v_name;
         oparent = fvp->v_parent;
  
+skipped_lookup:
  #if CONFIG_FSE
  #if CONFIG_FSE
-       need_event = need_fsevent(FSE_RENAME, fvp);
-       if (need_event) { 
-               get_fse_info(fvp, &from_finfo, ctx);
+       need_event = need_fsevent(FSE_RENAME, fdvp);
+       if (need_event) {
+               if (fvp) {
+                       get_fse_info(fvp, &from_finfo, ctx);
+               } else {
+                       error = vfs_get_notify_attributes(&__rename_data->fv_attr);
+                       if (error) {
+                               goto out1;
+                       }
+
+                       fvap = &__rename_data->fv_attr;
+               }
  
                 if (tvp) {
                         get_fse_info(tvp, &to_finfo, ctx);
  
                 if (tvp) {
                         get_fse_info(tvp, &to_finfo, ctx);
+               } else if (batched) {
+                       error = vfs_get_notify_attributes(&__rename_data->tv_attr);
+                       if (error) {
+                               goto out1;
+                       }
+
+                       tvap = &__rename_data->tv_attr;
                 }
         }
  #else
                 }
         }
  #else
@@ -5482,26 +6879,36 @@ auth_exit:
  #endif /* CONFIG_FSE */
  
         if (need_event || kauth_authorize_fileop_has_listeners()) {
  #endif /* CONFIG_FSE */
  
         if (need_event || kauth_authorize_fileop_has_listeners()) {
-               GET_PATH(from_name);
                 if (from_name == NULL) {
                 if (from_name == NULL) {
-                       error = ENOMEM;
-                       goto out1;
+                       GET_PATH(from_name);
+                       if (from_name == NULL) {
+                               error = ENOMEM;
+                               goto out1;
+                       }
                 }
  
                 }
  
-               from_len = safe_getpath(fdvp, fromnd.ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
+               from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
  
  
-               GET_PATH(to_name);
                 if (to_name == NULL) {
                 if (to_name == NULL) {
-                       error = ENOMEM;
-                       goto out1;
+                       GET_PATH(to_name);
+                       if (to_name == NULL) {
+                               error = ENOMEM;
+                               goto out1;
+                       }
                 }
  
                 }
  
-               to_len = safe_getpath(tdvp, tond.ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
-       } 
-       
-       error = VNOP_RENAME(fdvp, fvp, &fromnd.ni_cnd,
-                           tdvp, tvp, &tond.ni_cnd,
-                           ctx);
+               to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
+       }
+#if CONFIG_SECLUDED_RENAME
+       if (flags & VFS_SECLUDE_RENAME) {
+               fromnd->ni_cnd.cn_flags |=  CN_SECLUDE_RENAME;
+       }
+#else
+       #pragma unused(flags)
+#endif
+       error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
+                           tdvp, &tvp, &tond->ni_cnd, tvap,
+                           0, ctx);
  
         if (holding_mntlock) {
                 /*
  
         if (holding_mntlock) {
                 /*
@@ -5513,25 +6920,38 @@ auth_exit:
                 holding_mntlock = 0;
         }
         if (error) {
                 holding_mntlock = 0;
         }
         if (error) {
-        /*
-         * We may encounter a race in the VNOP where the destination didn't 
-         * exist when we did the namei, but it does by the time we go and 
-         * try to create the entry. In this case, we should re-drive this rename
-         * call from the top again.  Currently, only HFS bubbles out ERECYCLE,
-                * but other filesystems susceptible to this race could return it, too. 
-         */
-        if (error == ERECYCLE) {
-            do_retry = 1;
-        }
+               if (error == EKEEPLOOKING) {
+                       if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
+                               if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
+                                       panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
+                               }
+                       }
+
+                       fromnd->ni_vp = fvp;
+                       tond->ni_vp = tvp;
+
+                       goto continue_lookup;
+               }
+
+               /*
+                * We may encounter a race in the VNOP where the destination didn't
+                * exist when we did the namei, but it does by the time we go and
+                * try to create the entry. In this case, we should re-drive this rename
+                * call from the top again.  Currently, only HFS bubbles out ERECYCLE,
+                * but other filesystems susceptible to this race could return it, too.
+                */
+               if (error == ERECYCLE) {
+                       do_retry = 1;
+               }
  
                 goto out1;
  
                 goto out1;
-       } 
-       
-       /* call out to allow 3rd party notification of rename. 
+       }
+
+       /* call out to allow 3rd party notification of rename.
          * Ignore result of kauth_authorize_fileop call.
          */
          * Ignore result of kauth_authorize_fileop call.
          */
-       kauth_authorize_fileop(vfs_context_ucred(ctx), 
-                       KAUTH_FILEOP_RENAME, 
+       kauth_authorize_fileop(vfs_context_ucred(ctx),
+                       KAUTH_FILEOP_RENAME,
                         (uintptr_t)from_name, (uintptr_t)to_name);
  
  #if CONFIG_FSE
                         (uintptr_t)from_name, (uintptr_t)to_name);
  
  #if CONFIG_FSE
@@ -5540,6 +6960,14 @@ auth_exit:
                         // set it here since only the from_finfo gets reported up to user space
                         from_finfo.mode |= FSE_TRUNCATED_PATH;
                 }
                         // set it here since only the from_finfo gets reported up to user space
                         from_finfo.mode |= FSE_TRUNCATED_PATH;
                 }
+
+               if (tvap && tvp) {
+                       vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
+               }
+               if (fvap) {
+                       vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
+               }
+
                 if (tvp) {
                         add_fsevent(FSE_RENAME, ctx,
                                     FSE_ARG_STRING, from_len, from_name,
                 if (tvp) {
                         add_fsevent(FSE_RENAME, ctx,
                                     FSE_ARG_STRING, from_len, from_name,
@@ -5556,7 +6984,7 @@ auth_exit:
                 }
         }
  #endif /* CONFIG_FSE */
                 }
         }
  #endif /* CONFIG_FSE */
-               
+
         /*
          * update filesystem's mount point data
          */
         /*
          * update filesystem's mount point data
          */
@@ -5575,7 +7003,10 @@ auth_exit:
                 }
                 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
  
                 }
                 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
  
-               error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len);
+               if (UIO_SEG_IS_USER_SPACE(segflg))
+                       error = copyinstr(to, tobuf, MAXPATHLEN, &len);
+               else
+                       error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
                 if (!error) {
                         /* find current mount point prefix */
                         pathend = &mp->mnt_vfsstat.f_mntonname[0];
                 if (!error) {
                         /* find current mount point prefix */
                         pathend = &mp->mnt_vfsstat.f_mntonname[0];
@@ -5598,12 +7029,14 @@ auth_exit:
                 vfs_unbusy(mp);
         }
         /*
                 vfs_unbusy(mp);
         }
         /*
-        * fix up name & parent pointers.  note that we first   
+        * fix up name & parent pointers.  note that we first
          * check that fvp has the same name/parent pointers it
          * had before the rename call... this is a 'weak' check
          * at best...
          * check that fvp has the same name/parent pointers it
          * had before the rename call... this is a 'weak' check
          * at best...
+        *
+        * XXX oparent and oname may not be set in the compound vnop case
          */
          */
-       if (oname == fvp->v_name && oparent == fvp->v_parent) {
+       if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
                 int update_flags;
  
                 update_flags = VNODE_UPDATE_NAME;
                 int update_flags;
  
                 update_flags = VNODE_UPDATE_NAME;
@@ -5611,7 +7044,7 @@ auth_exit:
                 if (fdvp != tdvp)
                         update_flags |= VNODE_UPDATE_PARENT;
  
                 if (fdvp != tdvp)
                         update_flags |= VNODE_UPDATE_PARENT;
  
-               vnode_update_identity(fvp, tdvp, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen, tond.ni_cnd.cn_hash, update_flags);
+               vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
         }
  out1:
         if (to_name != NULL) {
         }
  out1:
         if (to_name != NULL) {
@@ -5632,7 +7065,7 @@ out1:
                  * nameidone has to happen before we vnode_put(tdvp)
                  * since it may need to release the fs_nodelock on the tdvp
                  */
                  * nameidone has to happen before we vnode_put(tdvp)
                  * since it may need to release the fs_nodelock on the tdvp
                  */
-               nameidone(&tond);
+               nameidone(tond);
  
                 if (tvp)
                         vnode_put(tvp);
  
                 if (tvp)
                         vnode_put(tvp);
@@ -5643,25 +7076,51 @@ out1:
                  * nameidone has to happen before we vnode_put(fdvp)
                  * since it may need to release the fs_nodelock on the fdvp
                  */
                  * nameidone has to happen before we vnode_put(fdvp)
                  * since it may need to release the fs_nodelock on the fdvp
                  */
-               nameidone(&fromnd);
+               nameidone(fromnd);
  
                 if (fvp)
                         vnode_put(fvp);
                 vnode_put(fdvp);
         }
  
                 if (fvp)
                         vnode_put(fvp);
                 vnode_put(fdvp);
         }
-       
-    /*
-     * If things changed after we did the namei, then we will re-drive
-     * this rename call from the top.
-     */
-       if(do_retry) {
-        do_retry = 0;
+
+       /*
+        * If things changed after we did the namei, then we will re-drive
+        * this rename call from the top.
+        */
+       if (do_retry) {
+               do_retry = 0;
                 goto retry;
         }
                 goto retry;
         }
-       
+
+       FREE(__rename_data, M_TEMP);
         return (error);
  }
  
         return (error);
  }
  
+int
+rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
+{
+       return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
+           AT_FDCWD, uap->to, UIO_USERSPACE, 0));
+}
+
+#if CONFIG_SECLUDED_RENAME
+int rename_ext(__unused proc_t p, struct rename_ext_args *uap, __unused int32_t *retval)
+{
+       return renameat_internal(
+               vfs_context_current(), 
+               AT_FDCWD, uap->from,
+               AT_FDCWD, uap->to, 
+               UIO_USERSPACE, uap->flags);
+}
+#endif
+ 
+int
+renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
+{
+       return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
+           uap->tofd, uap->to, UIO_USERSPACE, 0));
+}
+
  /*
   * Make a directory file.
   *
  /*
   * Make a directory file.
   *
@@ -5673,46 +7132,83 @@ out1:
   */
  /* ARGSUSED */
  static int
   */
  /* ARGSUSED */
  static int
-mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
+mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
+    enum uio_seg segflg)
  {
         vnode_t vp, dvp;
         int error;
         int update_flags = 0;
  {
         vnode_t vp, dvp;
         int error;
         int update_flags = 0;
+       int batched;
         struct nameidata nd;
  
         AUDIT_ARG(mode, vap->va_mode);
         struct nameidata nd;
  
         AUDIT_ARG(mode, vap->va_mode);
-       NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, 
-               UIO_USERSPACE, path, ctx);
+       NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
+              path, ctx);
         nd.ni_cnd.cn_flags |= WILLBEDIR;
         nd.ni_cnd.cn_flags |= WILLBEDIR;
-       error = namei(&nd);
+       nd.ni_flag = NAMEI_COMPOUNDMKDIR;
+
+continue_lookup:
+       error = nameiat(&nd, fd);
         if (error)
                 return (error);
         dvp = nd.ni_dvp;
         vp = nd.ni_vp;
  
         if (error)
                 return (error);
         dvp = nd.ni_dvp;
         vp = nd.ni_vp;
  
-       if (vp != NULL) {
-               error = EEXIST;
-               goto out;
-       }
+       if (vp != NULL) {
+               error = EEXIST;
+               goto out;
+       }
+
+       batched = vnode_compound_mkdir_available(dvp);
  
         VATTR_SET(vap, va_type, VDIR);
  
         VATTR_SET(vap, va_type, VDIR);
-   
-#if CONFIG_MACF
-       error = mac_vnode_check_create(ctx,
-           nd.ni_dvp, &nd.ni_cnd, vap);
-       if (error)
+
+       /*
+        * XXX
+        * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
+        * only get EXISTS or EISDIR for existing path components, and not that it could see
+        * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
+        * it will fail in a spurious  manner.  Need to figure out if this is valid behavior.
+        */
+       if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
+               if (error == EACCES || error == EPERM) {
+                       int error2;
+
+                       nameidone(&nd);
+                       vnode_put(dvp);
+                       dvp = NULLVP;
+
+                       /*
+                        * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
+                        * rather than EACCESS if the target exists.
+                        */
+                       NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
+                                       path, ctx);
+                       error2 = nameiat(&nd, fd);
+                       if (error2) {
+                               goto out;
+                       } else {
+                               vp = nd.ni_vp;
+                               error = EEXIST;
+                               goto out;
+                       }
+               }
+
                 goto out;
                 goto out;
-#endif
+       }
+
+       /*
+        * make the directory
+        */
+       if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
+               if (error == EKEEPLOOKING) {
+                       nd.ni_vp = vp;
+                       goto continue_lookup;
+               }
+
+               goto out;
+       }
  
  
-       /* authorize addition of a directory to the parent */
-       if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
-               goto out;
-       
-   
-       /* make the directory */
-       if ((error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx)) != 0)
-               goto out;
-               
         // Make sure the name & parent pointers are hooked up
         if (vp->v_name == NULL)
                 update_flags |= VNODE_UPDATE_NAME;
         // Make sure the name & parent pointers are hooked up
         if (vp->v_name == NULL)
                 update_flags |= VNODE_UPDATE_NAME;
@@ -5734,8 +7230,9 @@ out:
         nameidone(&nd);
  
         if (vp)
         nameidone(&nd);
  
         if (vp)
-               vnode_put(vp);
-       vnode_put(dvp);
+               vnode_put(vp);
+       if (dvp)
+               vnode_put(dvp);
  
         return (error);
  }
  
         return (error);
  }
@@ -5745,12 +7242,12 @@ out:
   *
   * Parameters:    p                       Process requesting to create the directory
   *                uap                     User argument descriptor (see below)
   *
   * Parameters:    p                       Process requesting to create the directory
   *                uap                     User argument descriptor (see below)
- *                retval                  (ignored) 
+ *                retval                  (ignored)
   *
   * Indirect:      uap->path               Path of directory to create
   *                uap->mode               Access permissions to set
   *                uap->xsecurity          ACL to set
   *
   * Indirect:      uap->path               Path of directory to create
   *                uap->mode               Access permissions to set
   *                uap->xsecurity          ACL to set
- *                
+ *
   * Returns:        0                      Success
   *                !0                      Not success
   *
   * Returns:        0                      Success
   *                !0                      Not success
   *
@@ -5770,11 +7267,12 @@ mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retv
                 return ciferror;
  
         VATTR_INIT(&va);
                 return ciferror;
  
         VATTR_INIT(&va);
-       VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
+       VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
         if (xsecdst != NULL)
                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
  
         if (xsecdst != NULL)
                 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
  
-       ciferror = mkdir1(vfs_context_current(), uap->path, &va);
+       ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
+           UIO_USERSPACE);
         if (xsecdst != NULL)
                 kauth_filesec_free(xsecdst);
         return ciferror;
         if (xsecdst != NULL)
                 kauth_filesec_free(xsecdst);
         return ciferror;
@@ -5786,169 +7284,211 @@ mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
         struct vnode_attr va;
  
         VATTR_INIT(&va);
         struct vnode_attr va;
  
         VATTR_INIT(&va);
-       VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
+       VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
  
  
-       return(mkdir1(vfs_context_current(), uap->path, &va));
+       return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
+           UIO_USERSPACE));
  }
  
  }
  
-/*
- * Remove a directory file.
- */
-/* ARGSUSED */
  int
  int
-rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
+mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
+{
+       struct vnode_attr va;
+
+       VATTR_INIT(&va);
+       VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
+
+       return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
+           UIO_USERSPACE));
+}
+
+static int
+rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
+    enum uio_seg segflg)
  {
         vnode_t vp, dvp;
         int error;
         struct nameidata nd;
  {
         vnode_t vp, dvp;
         int error;
         struct nameidata nd;
-       vfs_context_t ctx = vfs_context_current();
+       char     *path = NULL;
+       int       len=0;
+       int has_listeners = 0;
+       int need_event = 0;
+       int truncated = 0;
+#if CONFIG_FSE
+       struct vnode_attr va;
+#endif /* CONFIG_FSE */
+       struct vnode_attr *vap = NULL;
+       int batched;
  
         int restart_flag;
  
         int restart_flag;
-       uint32_t oldvp_id = UINT32_MAX;
  
  
-       /* 
+       /*
          * This loop exists to restart rmdir in the unlikely case that two
          * processes are simultaneously trying to remove the same directory
          * containing orphaned appleDouble files.
          */
         do {
          * This loop exists to restart rmdir in the unlikely case that two
          * processes are simultaneously trying to remove the same directory
          * containing orphaned appleDouble files.
          */
         do {
+               NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
+                   segflg, dirpath, ctx);
+               nd.ni_flag = NAMEI_COMPOUNDRMDIR;
+continue_lookup:
                 restart_flag = 0;
                 restart_flag = 0;
+               vap = NULL;
  
  
-               NDINIT(&nd, DELETE, LOCKPARENT | AUDITVNPATH1, 
-                               UIO_USERSPACE, uap->path, ctx);
-               error = namei(&nd);
+               error = nameiat(&nd, fd);
                 if (error)
                         return (error);
  
                 dvp = nd.ni_dvp;
                 vp = nd.ni_vp;
  
                 if (error)
                         return (error);
  
                 dvp = nd.ni_dvp;
                 vp = nd.ni_vp;
  
+               if (vp) {
+                       batched = vnode_compound_rmdir_available(vp);
  
  
-               /*
-                * If being restarted check if the new vp
-                * still has the same v_id.
-                */
-               if (oldvp_id != UINT32_MAX && oldvp_id != vp->v_id) {
-                       error = ENOENT;
-                       goto out;
-               }
+                       if (vp->v_flag & VROOT) {
+                               /*
+                                * The root of a mounted filesystem cannot be deleted.
+                                */
+                               error = EBUSY;
+                               goto out;
+                       }
  
  
-               if (vp->v_type != VDIR) {
-                       /*
-                        * rmdir only deals with directories
-                        */
-                       error = ENOTDIR;
-               } else if (dvp == vp) {
                         /*
                         /*
-                        * No rmdir "." please.
+                        * Removed a check here; we used to abort if vp's vid
+                        * was not the same as what we'd seen the last time around.
+                        * I do not think that check was valid, because if we retry
+                        * and all dirents are gone, the directory could legitimately
+                        * be recycled but still be present in a situation where we would
+                        * have had permission to delete.  Therefore, we won't make
+                        * an effort to preserve that check now that we may not have a
+                        * vp here.
                          */
                          */
-                       error = EINVAL;
-               } else if (vp->v_flag & VROOT) {
-                       /*
-                        * The root of a mounted filesystem cannot be deleted.
-                        */
-                       error = EBUSY;
+
+                       if (!batched) {
+                               error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
+                               if (error) {
+                                       goto out;
+                               }
+                       }
                 } else {
                 } else {
-#if CONFIG_MACF
-                       error = mac_vnode_check_unlink(ctx, dvp,
-                                       vp, &nd.ni_cnd);
-                       if (!error)
-#endif
-                               error = vnode_authorize(vp, nd.ni_dvp, KAUTH_VNODE_DELETE, ctx);
+                       batched = 1;
+
+                       if (!vnode_compound_rmdir_available(dvp)) {
+                               panic("No error, but no compound rmdir?");
+                       }
                 }
                 }
-               if (!error) {
-                       char     *path = NULL;
-                       int       len=0;
-                       int has_listeners = 0;
-                       int need_event = 0;
-                       int truncated = 0;
+
  #if CONFIG_FSE
  #if CONFIG_FSE
-                       fse_info  finfo;
+               fse_info  finfo;
  
  
-                       need_event = need_fsevent(FSE_DELETE, dvp);
-                       if (need_event) {
+               need_event = need_fsevent(FSE_DELETE, dvp);
+               if (need_event) {
+                       if (!batched) {
                                 get_fse_info(vp, &finfo, ctx);
                                 get_fse_info(vp, &finfo, ctx);
+                       } else {
+                               error = vfs_get_notify_attributes(&va);
+                               if (error) {
+                                       goto out;
+                               }
+
+                               vap = &va;
                         }
                         }
+               }
  #endif
  #endif
-                       has_listeners = kauth_authorize_fileop_has_listeners();
-                       if (need_event || has_listeners) {
+               has_listeners = kauth_authorize_fileop_has_listeners();
+               if (need_event || has_listeners) {
+                       if (path == NULL) {
                                 GET_PATH(path);
                                 if (path == NULL) {
                                         error = ENOMEM;
                                 GET_PATH(path);
                                 if (path == NULL) {
                                         error = ENOMEM;
-                                       goto out;
-                               }
-
-                               len = safe_getpath(vp, NULL, path, MAXPATHLEN, &truncated);
-#if CONFIG_FSE
-                               if (truncated) {
-                                       finfo.mode |= FSE_TRUNCATED_PATH;
+                                       goto out;
                                 }
                                 }
-#endif
                         }
  
                         }
  
-                       error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
-
-                       /*
-                        * Special case to remove orphaned AppleDouble
-                        * files. I don't like putting this in the kernel,
-                        * but carbon does not like putting this in carbon either,
-                        * so here we are.
-                        */
-                       if (error == ENOTEMPTY) {
-                               error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
-                               if (error == EBUSY) {
-                                       oldvp_id = vp->v_id;
-                                       goto out;
-                               }
+                       len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
+#if CONFIG_FSE
+                       if (truncated) {
+                               finfo.mode |= FSE_TRUNCATED_PATH;
+                       }
+#endif
+               }
  
  
+               error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
+               nd.ni_vp = vp;
+               if (vp == NULLVP) {
+                       /* Couldn't find a vnode */
+                       goto out;
+               }
  
  
-                               /*
-                                * Assuming everything went well, we will try the RMDIR again 
-                                */
-                               if (!error)
-                                       error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
+               if (error == EKEEPLOOKING) {
+                       goto continue_lookup;
+               }
+#if CONFIG_APPLEDOUBLE
+               /*
+                * Special case to remove orphaned AppleDouble
+                * files. I don't like putting this in the kernel,
+                * but carbon does not like putting this in carbon either,
+                * so here we are.
+                */
+               if (error == ENOTEMPTY) {
+                       error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
+                       if (error == EBUSY) {
+                               goto out;
                         }
  
                         }
  
+
                         /*
                         /*
-                        * Call out to allow 3rd party notification of delete. 
-                        * Ignore result of kauth_authorize_fileop call.
+                        * Assuming everything went well, we will try the RMDIR again
                          */
                          */
-                       if (!error) {
-                               if (has_listeners) {
-                                       kauth_authorize_fileop(vfs_context_ucred(ctx), 
-                                                       KAUTH_FILEOP_DELETE, 
-                                                       (uintptr_t)vp,
-                                                       (uintptr_t)path);
-                               }
+                       if (!error)
+                               error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
+               }
+#endif /* CONFIG_APPLEDOUBLE */
+               /*
+                * Call out to allow 3rd party notification of delete.
+                * Ignore result of kauth_authorize_fileop call.
+                */
+               if (!error) {
+                       if (has_listeners) {
+                               kauth_authorize_fileop(vfs_context_ucred(ctx),
+                                               KAUTH_FILEOP_DELETE,
+                                               (uintptr_t)vp,
+                                               (uintptr_t)path);
+                       }
  
  
-                               if (vp->v_flag & VISHARDLINK) {
-                                   // see the comment in unlink1() about why we update
-                                   // the parent of a hard link when it is removed
-                                   vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
-                               }
+                       if (vp->v_flag & VISHARDLINK) {
+                               // see the comment in unlink1() about why we update
+                               // the parent of a hard link when it is removed
+                               vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
+                       }
  
  #if CONFIG_FSE
  
  #if CONFIG_FSE
-                               if (need_event) {
-                                       add_fsevent(FSE_DELETE, ctx,
-                                                       FSE_ARG_STRING, len, path,
-                                                       FSE_ARG_FINFO, &finfo,
-                                                       FSE_ARG_DONE);
+                       if (need_event) {
+                               if (vap) {
+                                       vnode_get_fse_info_from_vap(vp, &finfo, vap);
                                 }
                                 }
-#endif
+                               add_fsevent(FSE_DELETE, ctx,
+                                               FSE_ARG_STRING, len, path,
+                                               FSE_ARG_FINFO, &finfo,
+                                               FSE_ARG_DONE);
                         }
                         }
-                       if (path != NULL)
-                               RELEASE_PATH(path);
+#endif
                 }
  
  out:
                 }
  
  out:
+               if (path != NULL) {
+                       RELEASE_PATH(path);
+                       path = NULL;
+               }
                 /*
                  * nameidone has to happen before we vnode_put(dvp)
                  * since it may need to release the fs_nodelock on the dvp
                  */
                 nameidone(&nd);
                 /*
                  * nameidone has to happen before we vnode_put(dvp)
                  * since it may need to release the fs_nodelock on the dvp
                  */
                 nameidone(&nd);
-
                 vnode_put(dvp);
                 vnode_put(dvp);
-               vnode_put(vp);
+
+               if (vp)
+                       vnode_put(vp);
  
                 if (restart_flag == 0) {
                         wakeup_one((caddr_t)vp);
  
                 if (restart_flag == 0) {
                         wakeup_one((caddr_t)vp);
@@ -5962,22 +7502,34 @@ out:
  
  }
  
  
  }
  
+/*
+ * Remove a directory file.
+ */
+/* ARGSUSED */
+int
+rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
+{
+       return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
+           CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
+}
+
  /* Get direntry length padded to 8 byte alignment */
  #define DIRENT64_LEN(namlen) \
         ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
  
  /* Get direntry length padded to 8 byte alignment */
  #define DIRENT64_LEN(namlen) \
         ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
  
-static errno_t 
+errno_t
  vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
                  int *numdirent, vfs_context_t ctxp)
  {
         /* Check if fs natively supports VNODE_READDIR_EXTENDED */
  vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
                  int *numdirent, vfs_context_t ctxp)
  {
         /* Check if fs natively supports VNODE_READDIR_EXTENDED */
-       if (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) {
+       if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) && 
+                  ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0))  {
                 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
         } else {
                 size_t bufsize;
                 void * bufptr;
                 uio_t auio;
                 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
         } else {
                 size_t bufsize;
                 void * bufptr;
                 uio_t auio;
-               struct direntry entry64;
+               struct direntry *entry64;
                 struct dirent *dep;
                 int bytesread;
                 int error;
                 struct dirent *dep;
                 int bytesread;
                 int error;
@@ -5995,7 +7547,7 @@ vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
                  * use 32K in the MIN(), but we use magic number 87371 to
                  * prevent uio_resid() * 3 / 8 from overflowing. 
                  */
                  * use 32K in the MIN(), but we use magic number 87371 to
                  * prevent uio_resid() * 3 / 8 from overflowing. 
                  */
-               bufsize = 3 * MIN(uio_resid(uio), 87371) / 8;
+               bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
                 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
                 if (bufptr == NULL) {
                         return ENOMEM;
                 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
                 if (bufptr == NULL) {
                         return ENOMEM;
@@ -6005,58 +7557,348 @@ vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
                 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
                 auio->uio_offset = uio->uio_offset;
  
                 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
                 auio->uio_offset = uio->uio_offset;
  
-               error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
+               error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
+
+               dep = (struct dirent *)bufptr;
+               bytesread = bufsize - uio_resid(auio);
+
+               MALLOC(entry64, struct direntry *, sizeof(struct direntry),
+                      M_TEMP, M_WAITOK);
+               /*
+                * Convert all the entries and copy them out to user's buffer.
+                */
+               while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
+                       size_t  enbufsize = DIRENT64_LEN(dep->d_namlen);
+
+                       bzero(entry64, enbufsize);
+                       /* Convert a dirent to a dirent64. */
+                       entry64->d_ino = dep->d_ino;
+                       entry64->d_seekoff = 0;
+                       entry64->d_reclen = enbufsize;
+                       entry64->d_namlen = dep->d_namlen;
+                       entry64->d_type = dep->d_type;
+                       bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
+
+                       /* Move to next entry. */
+                       dep = (struct dirent *)((char *)dep + dep->d_reclen);
+
+                       /* Copy entry64 to user's buffer. */
+                       error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
+               }
+
+               /* Update the real offset using the offset we got from VNOP_READDIR. */
+               if (error == 0) {
+                       uio->uio_offset = auio->uio_offset;
+               }
+               uio_free(auio);
+               FREE(bufptr, M_TEMP);
+               FREE(entry64, M_TEMP);
+               return (error);
+       }
+}
+
+#define GETDIRENTRIES_MAXBUFSIZE       (128 * 1024 * 1024U)
+
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+static int
+getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
+                     off_t *offset, int flags)
+{
+       vnode_t vp;
+       struct vfs_context context = *vfs_context_current();    /* local copy */
+       struct fileproc *fp;
+       uio_t auio;
+       int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+       off_t loff;
+       int error, eofflag, numdirent;
+       char uio_buf[ UIO_SIZEOF(1) ];
+
+       error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
+       if (error) {
+               return (error);
+       }
+       if ((fp->f_fglob->fg_flag & FREAD) == 0) {
+               AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
+               error = EBADF;
+               goto out;
+       }
+
+       if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
+               bufsize = GETDIRENTRIES_MAXBUFSIZE;
+
+#if CONFIG_MACF
+       error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
+       if (error)
+               goto out;
+#endif
+       if ( (error = vnode_getwithref(vp)) ) {
+               goto out;
+       }
+       AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+unionread:
+       if (vp->v_type != VDIR) {
+               (void)vnode_put(vp);
+               error = EINVAL;
+               goto out;
+       }
+
+#if CONFIG_MACF
+       error = mac_vnode_check_readdir(&context, vp);
+       if (error != 0) {
+               (void)vnode_put(vp);
+               goto out;
+       }
+#endif /* MAC */
+
+       loff = fp->f_fglob->fg_offset;
+       auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
+       uio_addiov(auio, bufp, bufsize);
+
+       if (flags & VNODE_READDIR_EXTENDED) {
+               error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
+               fp->f_fglob->fg_offset = uio_offset(auio);
+       } else {
+               error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
+               fp->f_fglob->fg_offset = uio_offset(auio);
+       }
+       if (error) {
+               (void)vnode_put(vp);
+               goto out;
+       }
+
+       if ((user_ssize_t)bufsize == uio_resid(auio)){
+               if (union_dircheckp) {
+                       error = union_dircheckp(&vp, fp, &context);
+                       if (error == -1)
+                               goto unionread;
+                       if (error)
+                               goto out;
+               }
+
+               if ((vp->v_mount->mnt_flag & MNT_UNION)) {
+                       struct vnode *tvp = vp;
+                       if (lookup_traverse_union(tvp, &vp, &context) == 0) {
+                               vnode_ref(vp);
+                               fp->f_fglob->fg_data = (caddr_t) vp;
+                               fp->f_fglob->fg_offset = 0;
+                               vnode_rele(tvp);
+                               vnode_put(tvp);
+                               goto unionread;
+                       }
+                       vp = tvp;
+               }
+       }
+
+       vnode_put(vp);
+       if (offset) {
+               *offset = loff;
+       }
+       
+       *bytesread = bufsize - uio_resid(auio);
+out:
+       file_drop(fd);
+       return (error);
+}
+
+
+int
+getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
+{
+       off_t offset;
+       ssize_t bytesread;
+       int error;
+
+       AUDIT_ARG(fd, uap->fd);
+       error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
+
+       if (error == 0) {
+               if (proc_is64bit(p)) {
+                       user64_long_t base = (user64_long_t)offset;
+                       error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
+               } else {
+                       user32_long_t base = (user32_long_t)offset;
+                       error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
+               }
+               *retval = bytesread;
+       }
+       return (error);
+}
+
+int
+getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
+{
+       off_t offset;
+       ssize_t bytesread;
+       int error;
+
+       AUDIT_ARG(fd, uap->fd);
+       error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
+
+       if (error == 0) {
+               *retval = bytesread;
+               error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
+       }
+       return (error);
+}
+
+
+/*
+ * Set the mode mask for creation of filesystem nodes.
+ * XXX implement xsecurity
+ */
+#define UMASK_NOXSECURITY       (void *)1      /* leave existing xsecurity alone */
+static int
+umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
+{
+       struct filedesc *fdp;
+
+       AUDIT_ARG(mask, newmask);
+       proc_fdlock(p);
+       fdp = p->p_fd;
+       *retval = fdp->fd_cmask;
+       fdp->fd_cmask = newmask & ALLPERMS;
+       proc_fdunlock(p);
+       return (0);
+}
+
+/*
+ * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
+ *
+ * Parameters:    p                       Process requesting to set the umask
+ *                uap                     User argument descriptor (see below)
+ *                retval                  umask of the process (parameter p)
+ *
+ * Indirect:      uap->newmask            umask to set
+ *                uap->xsecurity          ACL to set
+ *                
+ * Returns:        0                      Success
+ *                !0                      Not success
+ *
+ */
+int
+umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
+{
+       int ciferror;
+       kauth_filesec_t xsecdst;
+
+       xsecdst = KAUTH_FILESEC_NONE;
+       if (uap->xsecurity != USER_ADDR_NULL) {
+               if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
+                       return ciferror;
+       } else {
+               xsecdst = KAUTH_FILESEC_NONE;
+       }
  
  
-               dep = (struct dirent *)bufptr;
-               bytesread = bufsize - uio_resid(auio);
+       ciferror = umask1(p, uap->newmask, xsecdst, retval);
  
  
-               /*
-                * Convert all the entries and copy them out to user's buffer.
-                */
-               while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
-                       /* Convert a dirent to a dirent64. */
-                       entry64.d_ino = dep->d_ino;
-                       entry64.d_seekoff = 0;
-                       entry64.d_reclen = DIRENT64_LEN(dep->d_namlen);
-                       entry64.d_namlen = dep->d_namlen;
-                       entry64.d_type = dep->d_type;
-                       bcopy(dep->d_name, entry64.d_name, dep->d_namlen + 1);
+       if (xsecdst != KAUTH_FILESEC_NONE)
+               kauth_filesec_free(xsecdst);
+       return ciferror;
+}
  
  
-                       /* Move to next entry. */
-                       dep = (struct dirent *)((char *)dep + dep->d_reclen);
+int
+umask(proc_t p, struct umask_args *uap, int32_t *retval)
+{
+       return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
+}
  
  
-                       /* Copy entry64 to user's buffer. */
-                       error = uiomove((caddr_t)&entry64, entry64.d_reclen, uio);
-               }
+/*
+ * Void all references to file by ripping underlying filesystem
+ * away from vnode.
+ */
+/* ARGSUSED */
+int
+revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
+{
+       vnode_t vp;
+       struct vnode_attr va;
+       vfs_context_t ctx = vfs_context_current();
+       int error;
+       struct nameidata nd;
  
  
-               /* Update the real offset using the offset we got from VNOP_READDIR. */
-               if (error == 0) {
-                       uio->uio_offset = auio->uio_offset;
-               }
-               uio_free(auio);
-               FREE(bufptr, M_TEMP);
+       NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+              uap->path, ctx);
+       error = namei(&nd);
+       if (error)
                 return (error);
                 return (error);
+       vp = nd.ni_vp;
+
+       nameidone(&nd);
+
+       if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
+               error = ENOTSUP;
+               goto out;
+       }
+
+       if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
+               error = EBUSY;
+               goto out;
         }
         }
+
+#if CONFIG_MACF
+       error = mac_vnode_check_revoke(ctx, vp);
+       if (error)
+               goto out;
+#endif
+
+       VATTR_INIT(&va);
+       VATTR_WANTED(&va, va_uid);
+       if ((error = vnode_getattr(vp, &va, ctx)))
+               goto out;
+       if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
+           (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
+               goto out;
+       if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
+               VNOP_REVOKE(vp, REVOKEALL, ctx);
+out:
+       vnode_put(vp);
+       return (error);
  }
  
  }
  
+
  /*
  /*
- * Read a block of directory entries in a file system independent format.
+ *  HFS/HFS PlUS SPECIFIC SYSTEM CALLS
+ *  The following system calls are designed to support features
+ *  which are specific to the HFS & HFS Plus volume formats
   */
   */
-static int
-getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
-                     off_t *offset, int flags)
+
+
+/*
+ * Obtain attribute information on objects in a directory while enumerating
+ * the directory.
+ */
+/* ARGSUSED */
+int
+getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
  {
         vnode_t vp;
  {
         vnode_t vp;
-       struct vfs_context context = *vfs_context_current();    /* local copy */
         struct fileproc *fp;
         struct fileproc *fp;
-       uio_t auio;
-       int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
-       off_t loff;
-       int error, eofflag, numdirent;
+       uio_t auio = NULL;
+       int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+       uint32_t count, savecount;
+       uint32_t newstate;
+       int error, eofflag;
+       uint32_t loff;
+       struct attrlist attributelist; 
+       vfs_context_t ctx = vfs_context_current();
+       int fd = uap->fd;
         char uio_buf[ UIO_SIZEOF(1) ];
         char uio_buf[ UIO_SIZEOF(1) ];
+       kauth_action_t action;
  
  
-       error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
-       if (error) {
+       AUDIT_ARG(fd, fd);
+    
+       /* Get the attributes into kernel space */
+       if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
+               return(error);
+       }
+       if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
+               return(error);
+       }
+       savecount = count;
+       if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
                 return (error);
         }
         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
                 return (error);
         }
         if ((fp->f_fglob->fg_flag & FREAD) == 0) {
@@ -6065,14 +7907,18 @@ getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *byt
                 goto out;
         }
  
                 goto out;
         }
  
+
  #if CONFIG_MACF
  #if CONFIG_MACF
-       error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
+       error = mac_file_check_change_offset(vfs_context_ucred(ctx),
+           fp->f_fglob);
         if (error)
                 goto out;
  #endif
         if (error)
                 goto out;
  #endif
-       if ( (error = vnode_getwithref(vp)) ) {
+
+
+       if ( (error = vnode_getwithref(vp)) )
                 goto out;
                 goto out;
-       }
+
         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
  
  unionread:
         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
  
  unionread:
@@ -6083,904 +7929,1670 @@ unionread:
         }
  
  #if CONFIG_MACF
         }
  
  #if CONFIG_MACF
-       error = mac_vnode_check_readdir(&context, vp);
+       error = mac_vnode_check_readdir(ctx, vp);
         if (error != 0) {
                 (void)vnode_put(vp);
                 goto out;
         }
  #endif /* MAC */
  
         if (error != 0) {
                 (void)vnode_put(vp);
                 goto out;
         }
  #endif /* MAC */
  
+       /* set up the uio structure which will contain the users return buffer */
         loff = fp->f_fglob->fg_offset;
         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
         loff = fp->f_fglob->fg_offset;
         auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
-       uio_addiov(auio, bufp, bufsize);
+       uio_addiov(auio, uap->buffer, uap->buffersize);
+       
+       /*
+        * If the only item requested is file names, we can let that past with
+        * just LIST_DIRECTORY.  If they want any other attributes, that means
+        * they need SEARCH as well.
+        */
+       action = KAUTH_VNODE_LIST_DIRECTORY;
+       if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
+           attributelist.fileattr || attributelist.dirattr)
+               action |= KAUTH_VNODE_SEARCH;
+       
+       if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
  
  
-       if (flags & VNODE_READDIR_EXTENDED) {
-               error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
-               fp->f_fglob->fg_offset = uio_offset(auio);
-       } else {
-               error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
-               fp->f_fglob->fg_offset = uio_offset(auio);
+               /* Believe it or not, uap->options only has 32-bits of valid
+                * info, so truncate before extending again */
+
+               error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
+                               (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
         }
         }
+
         if (error) {
         if (error) {
-               (void)vnode_put(vp);
+               (void) vnode_put(vp);
+               goto out;
+       }
+
+       /*
+        * If we've got the last entry of a directory in a union mount
+        * then reset the eofflag and pretend there's still more to come.
+        * The next call will again set eofflag and the buffer will be empty,
+        * so traverse to the underlying directory and do the directory
+        * read there.
+        */
+       if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
+               if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
+                       eofflag = 0;
+               } else {                                                // Empty buffer
+                       struct vnode *tvp = vp;
+                       if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
+                               vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
+                               fp->f_fglob->fg_data = (caddr_t) vp;
+                               fp->f_fglob->fg_offset = 0; // reset index for new dir
+                               count = savecount;
+                               vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
+                               vnode_put(tvp);
+                               goto unionread;
+                       }
+                       vp = tvp;
+               }
+       }
+
+       (void)vnode_put(vp);
+
+       if (error) 
+               goto out;
+       fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
+
+       if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
+               goto out;
+       if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
+               goto out;
+       if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
+               goto out;
+
+       *retval = eofflag;  /* similar to getdirentries */
+       error = 0;
+out:
+       file_drop(fd);
+       return (error); /* return error earlier, an retval of 0 or 1 now */
+
+} /* end of getdirentriesattr system call */
+
+/*
+* Exchange data between two files
+*/
+
+/* ARGSUSED */
+int
+exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
+{
+
+       struct nameidata fnd, snd;
+       vfs_context_t ctx = vfs_context_current();
+       vnode_t fvp;
+       vnode_t svp;
+       int error;
+       u_int32_t nameiflags;
+       char *fpath = NULL;
+       char *spath = NULL;
+       int   flen=0, slen=0;
+       int from_truncated=0, to_truncated=0;
+#if CONFIG_FSE
+       fse_info f_finfo, s_finfo;
+#endif
+       
+       nameiflags = 0;
+       if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
+
+       NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
+              UIO_USERSPACE, uap->path1, ctx);
+
+       error = namei(&fnd);
+       if (error)
+               goto out2;
+
+       nameidone(&fnd);
+       fvp = fnd.ni_vp;
+
+       NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2, 
+               UIO_USERSPACE, uap->path2, ctx);
+
+       error = namei(&snd);
+       if (error) {
+               vnode_put(fvp);
+               goto out2;
+       }
+       nameidone(&snd);
+       svp = snd.ni_vp;
+
+       /*
+        * if the files are the same, return an inval error
+        */
+       if (svp == fvp) {
+               error = EINVAL;
+               goto out;
+       } 
+
+       /*
+        * if the files are on different volumes, return an error
+        */
+       if (svp->v_mount != fvp->v_mount) {
+               error = EXDEV;
+               goto out;
+       }
+
+       /* If they're not files, return an error */
+       if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
+               error = EINVAL;
+               goto out;
+       }
+
+#if CONFIG_MACF
+       error = mac_vnode_check_exchangedata(ctx,
+           fvp, svp);
+       if (error)
+               goto out;
+#endif
+       if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
+           ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
                 goto out;
                 goto out;
-       }
  
  
-       if ((user_ssize_t)bufsize == uio_resid(auio)){
-               if (union_dircheckp) {
-                       error = union_dircheckp(&vp, fp, &context);
-                       if (error == -1)
-                               goto unionread;
-                       if (error)
-                               goto out;
+       if (
+#if CONFIG_FSE
+       need_fsevent(FSE_EXCHANGE, fvp) || 
+#endif
+       kauth_authorize_fileop_has_listeners()) {
+               GET_PATH(fpath);
+               GET_PATH(spath);
+               if (fpath == NULL || spath == NULL) {
+                       error = ENOMEM;
+                       goto out;
                 }
  
                 }
  
-               if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) {
-                       struct vnode *tvp = vp;
-                       vp = vp->v_mount->mnt_vnodecovered;
-                       vnode_getwithref(vp);
-                       vnode_ref(vp);
-                       fp->f_fglob->fg_data = (caddr_t) vp;
-                       fp->f_fglob->fg_offset = 0;
-                       vnode_rele(tvp);
-                       vnode_put(tvp);
-                       goto unionread;
+               flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
+               slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
+               
+#if CONFIG_FSE
+               get_fse_info(fvp, &f_finfo, ctx);
+               get_fse_info(svp, &s_finfo, ctx);
+               if (from_truncated || to_truncated) {
+                       // set it here since only the f_finfo gets reported up to user space
+                       f_finfo.mode |= FSE_TRUNCATED_PATH;
                 }
                 }
+#endif
         }
         }
+       /* Ok, make the call */
+       error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
  
  
-       vnode_put(vp);
-       if (offset) {
-               *offset = loff;
+       if (error == 0) {
+           const char *tmpname;
+
+           if (fpath != NULL && spath != NULL) {
+                   /* call out to allow 3rd party notification of exchangedata. 
+                    * Ignore result of kauth_authorize_fileop call.
+                    */
+                   kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE, 
+                                          (uintptr_t)fpath, (uintptr_t)spath);
+           }
+           name_cache_lock();
+
+           tmpname     = fvp->v_name;
+           fvp->v_name = svp->v_name;
+           svp->v_name = tmpname;
+           
+           if (fvp->v_parent != svp->v_parent) {
+               vnode_t tmp;
+
+               tmp           = fvp->v_parent;
+               fvp->v_parent = svp->v_parent;
+               svp->v_parent = tmp;
+           }
+           name_cache_unlock();
+
+#if CONFIG_FSE
+           if (fpath != NULL && spath != NULL) {
+                   add_fsevent(FSE_EXCHANGE, ctx,
+                               FSE_ARG_STRING, flen, fpath,
+                               FSE_ARG_FINFO, &f_finfo,
+                               FSE_ARG_STRING, slen, spath,
+                               FSE_ARG_FINFO, &s_finfo,
+                               FSE_ARG_DONE);
+           }
+#endif
         }
         }
-       
-       *bytesread = bufsize - uio_resid(auio);
+
  out:
  out:
-       file_drop(fd);
-       return (error);
+       if (fpath != NULL)
+               RELEASE_PATH(fpath);
+       if (spath != NULL)
+               RELEASE_PATH(spath);
+       vnode_put(svp);
+       vnode_put(fvp);
+out2:
+        return (error);
  }
  
  }
  
+/*
+ * Return (in MB) the amount of freespace on the given vnode's volume.
+ */
+uint32_t freespace_mb(vnode_t vp);
  
  
-int
-getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
+uint32_t
+freespace_mb(vnode_t vp)
  {
  {
-       off_t offset;
-       ssize_t bytesread;
-       int error;
+       vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT); 
+       return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
+               vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
+}
  
  
-       AUDIT_ARG(fd, uap->fd);
-       error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
+#if CONFIG_SEARCHFS
  
  
-       if (error == 0) {
-               if (proc_is64bit(p)) {
-                       user64_long_t base = (user64_long_t)offset;
-                       error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
-               } else {
-                       user32_long_t base = (user32_long_t)offset;
-                       error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
-               }
-               *retval = bytesread;
-       }
-       return (error);
-}
+/* ARGSUSED */
  
  int
  
  int
-getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
+searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
  {
  {
-       off_t offset;
-       ssize_t bytesread;
-       int error;
+       vnode_t vp, tvp;
+       int i, error=0;
+       int fserror = 0;
+       struct nameidata nd;
+       struct user64_fssearchblock searchblock;
+       struct searchstate *state;
+       struct attrlist *returnattrs;
+       struct timeval timelimit;
+       void *searchparams1,*searchparams2;
+       uio_t auio = NULL;
+       int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+       uint32_t nummatches;
+       int mallocsize;
+       uint32_t nameiflags;
+       vfs_context_t ctx = vfs_context_current();
+       char uio_buf[ UIO_SIZEOF(1) ];
  
  
-       AUDIT_ARG(fd, uap->fd);
-       error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
+       /* Start by copying in fsearchblock parameter list */
+    if (IS_64BIT_PROCESS(p)) {
+        error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
+        timelimit.tv_sec = searchblock.timelimit.tv_sec;
+        timelimit.tv_usec = searchblock.timelimit.tv_usec;
+    }
+    else {
+        struct user32_fssearchblock tmp_searchblock;
  
  
-       if (error == 0) {
-               *retval = bytesread;
-               error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
-       }
-       return (error);
-}
+        error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
+        // munge into 64-bit version
+        searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
+        searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
+        searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
+        searchblock.maxmatches = tmp_searchblock.maxmatches;
+               /* 
+                * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
+                * from a 32 bit long, and tv_usec is already a signed 32 bit int.
+                */
+        timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
+        timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
+        searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
+        searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
+        searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
+        searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
+        searchblock.searchattrs = tmp_searchblock.searchattrs;
+    }
+       if (error)
+               return(error);
  
  
+       /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.  
+        */
+       if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS || 
+               searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
+               return(EINVAL);
+       
+       /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
+       /* It all has to do into local memory and it's not that big so we might as well  put it all together. */
+       /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
+       /* block.                                                                                             */
+       /*                                                                                                    */
+       /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate        */
+       /*       due to the changes in rdar://problem/12438273.  That way if a 3rd party file system          */
+       /*       assumes the size is still 556 bytes it will continue to work                                 */
+                
+       mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
+               sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
  
  
-/*
- * Set the mode mask for creation of filesystem nodes.
- * XXX implement xsecurity
- */
-#define UMASK_NOXSECURITY       (void *)1      /* leave existing xsecurity alone */
-static int
-umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
-{
-       struct filedesc *fdp;
+       MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
  
  
-       AUDIT_ARG(mask, newmask);
-       proc_fdlock(p);
-       fdp = p->p_fd;
-       *retval = fdp->fd_cmask;
-       fdp->fd_cmask = newmask & ALLPERMS;
-       proc_fdunlock(p);
-       return (0);
-}
+       /* Now set up the various pointers to the correct place in our newly allocated memory */
  
  
-/*
- * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
- *
- * Parameters:    p                       Process requesting to set the umask
- *                uap                     User argument descriptor (see below)
- *                retval                  umask of the process (parameter p)
- *
- * Indirect:      uap->newmask            umask to set
- *                uap->xsecurity          ACL to set
- *                
- * Returns:        0                      Success
- *                !0                      Not success
- *
- */
-int
-umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
-{
-       int ciferror;
-       kauth_filesec_t xsecdst;
+       searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
+       returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
+       state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
  
  
-       xsecdst = KAUTH_FILESEC_NONE;
-       if (uap->xsecurity != USER_ADDR_NULL) {
-               if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
-                       return ciferror;
-       } else {
-               xsecdst = KAUTH_FILESEC_NONE;
-       }
+       /* Now copy in the stuff given our local variables. */
+
+       if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
+               goto freeandexit;
+
+       if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
+               goto freeandexit;
+
+       if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
+               goto freeandexit;
+               
+       if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
+               goto freeandexit;
+
+       /*
+        * When searching a union mount, need to set the
+        * start flag at the first call on each layer to
+        * reset state for the new volume.
+        */
+       if (uap->options & SRCHFS_START)
+               state->ss_union_layer = 0;
+       else 
+               uap->options |= state->ss_union_flags;
+       state->ss_union_flags = 0;
+
+       /*
+        * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
+        * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
+        * The KPI does not provide us the ability to pass in the length of the buffers searchparams1 
+        * and searchparams2. To obviate the need for all searchfs-supporting filesystems to 
+        * validate the user-supplied data offset of the attrreference_t, we'll do it here.
+        */
+
+       if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
+               attrreference_t* string_ref;
+               u_int32_t* start_length;
+               user64_size_t param_length;            
+
+               /* validate searchparams1 */
+               param_length = searchblock.sizeofsearchparams1;                                           
+               /* skip the word that specifies length of the buffer */
+               start_length= (u_int32_t*) searchparams1;
+               start_length= start_length+1;
+               string_ref= (attrreference_t*) start_length;
  
  
-       ciferror = umask1(p, uap->newmask, xsecdst, retval);
+               /* ensure no negative offsets or too big offsets */
+               if (string_ref->attr_dataoffset < 0 ) {
+                       error = EINVAL;
+                       goto freeandexit;               
+               }
+               if (string_ref->attr_length > MAXPATHLEN) {
+                       error = EINVAL;
+                       goto freeandexit;
+               }
+               
+               /* Check for pointer overflow in the string ref */
+               if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
+                       error = EINVAL;
+                       goto freeandexit;
+               }
  
  
-       if (xsecdst != KAUTH_FILESEC_NONE)
-               kauth_filesec_free(xsecdst);
-       return ciferror;
-}
+               if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
+                       error = EINVAL;
+                       goto freeandexit;
+               }
+               if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
+                       error = EINVAL;
+                       goto freeandexit;
+               }
+       }
  
  
-int
-umask(proc_t p, struct umask_args *uap, int32_t *retval)
-{
-       return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
-}
+       /* set up the uio structure which will contain the users return buffer */
+       auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
+       uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
  
  
-/*
- * Void all references to file by ripping underlying filesystem
- * away from vnode.
- */
-/* ARGSUSED */
-int
-revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
-{
-       vnode_t vp;
-       struct vnode_attr va;
-       vfs_context_t ctx = vfs_context_current();
-       int error;
-       struct nameidata nd;
+       nameiflags = 0;
+       if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
+       NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
+              UIO_USERSPACE, uap->path, ctx);
  
  
-       NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
-               UIO_USERSPACE, uap->path, ctx);
         error = namei(&nd);
         if (error)
         error = namei(&nd);
         if (error)
-               return (error);
+               goto freeandexit;
         vp = nd.ni_vp;
         vp = nd.ni_vp;
-
         nameidone(&nd);
  
         nameidone(&nd);
  
-       if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
-               error = ENOTSUP;
-               goto out;
-       }
+       /*
+        * Switch to the root vnode for the volume
+        */
+       error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
+       vnode_put(vp);
+       if (error)
+               goto freeandexit;
+       vp = tvp;
  
  
-       if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
-               error = EBUSY;
-               goto out;
+       /*
+        * If it's a union mount, the path lookup takes
+        * us to the top layer. But we may need to descend
+        * to a lower layer. For non-union mounts the layer
+        * is always zero.
+        */
+       for (i = 0; i < (int) state->ss_union_layer; i++) {
+               if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
+                       break;
+               tvp = vp;
+               vp = vp->v_mount->mnt_vnodecovered;
+               if (vp == NULL) {
+                       vnode_put(tvp);
+                       error = ENOENT;
+                       goto freeandexit;
+               }
+               vnode_getwithref(vp);
+               vnode_put(tvp);
         }
  
  #if CONFIG_MACF
         }
  
  #if CONFIG_MACF
-       error = mac_vnode_check_revoke(ctx, vp);
-       if (error)
-               goto out;
+       error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
+       if (error) {
+               vnode_put(vp);
+               goto freeandexit;
+       }
  #endif
  
  #endif
  
-       VATTR_INIT(&va);
-       VATTR_WANTED(&va, va_uid);
-       if ((error = vnode_getattr(vp, &va, ctx)))
-               goto out;
-       if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
-           (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
-               goto out;
-       if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
-               VNOP_REVOKE(vp, REVOKEALL, ctx);
-out:
+        
+       /*
+        * If searchblock.maxmatches == 0, then skip the search. This has happened 
+        * before and sometimes the underlying code doesnt deal with it well.
+        */
+        if (searchblock.maxmatches == 0) {
+               nummatches = 0;
+               goto saveandexit;
+        }
+
+       /*
+        * Allright, we have everything we need, so lets make that call.
+        * 
+        * We keep special track of the return value from the file system:
+        * EAGAIN is an acceptable error condition that shouldn't keep us
+        * from copying out any results...
+        */
+
+       fserror = VNOP_SEARCHFS(vp,
+               searchparams1,
+               searchparams2,
+               &searchblock.searchattrs,
+               (u_long)searchblock.maxmatches,
+               &timelimit,
+               returnattrs,
+               &nummatches,
+               (u_long)uap->scriptcode,
+               (u_long)uap->options,
+               auio,
+               (struct searchstate *) &state->ss_fsstate,
+               ctx);
+               
+       /*
+        * If it's a union mount we need to be called again
+        * to search the mounted-on filesystem.
+        */
+       if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
+               state->ss_union_flags = SRCHFS_START;
+               state->ss_union_layer++;        // search next layer down
+               fserror = EAGAIN;
+       }
+
+saveandexit:
+
         vnode_put(vp);
         vnode_put(vp);
-       return (error);
-}
  
  
+       /* Now copy out the stuff that needs copying out. That means the number of matches, the
+          search state.  Everything was already put into he return buffer by the vop call. */
+
+       if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
+               goto freeandexit;
  
  
-/*
- *  HFS/HFS PlUS SPECIFIC SYSTEM CALLS
- *  The following system calls are designed to support features
- *  which are specific to the HFS & HFS Plus volume formats
- */
+       if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
+               goto freeandexit;
+       
+       error = fserror;
  
  
-#ifdef __APPLE_API_OBSOLETE
+freeandexit:
  
  
-/************************************************/
-/* *** Following calls will be deleted soon *** */
-/************************************************/
+       FREE(searchparams1,M_TEMP);
+
+       return(error);
+
+
+} /* end of searchfs system call */
+
+#else /* CONFIG_SEARCHFS */
  
  
-/*
- * Make a complex file.  A complex file is one with multiple forks (data streams)
- */
-/* ARGSUSED */
  int
  int
-mkcomplex(__unused proc_t p, __unused struct mkcomplex_args *uap, __unused int32_t *retval)
+searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
  {
         return (ENOTSUP);
  }
  
  {
         return (ENOTSUP);
  }
  
-/*
- * Extended stat call which returns volumeid and vnodeid as well as other info
- */
-/* ARGSUSED */
-int
-statv(__unused proc_t p,
-         __unused struct statv_args *uap,
-         __unused int32_t *retval)
-{
-       return (ENOTSUP);       /*  We'll just return an error for now */
+#endif /* CONFIG_SEARCHFS */
  
  
-} /* end of statv system call */
  
  
-/*
-* Extended lstat call which returns volumeid and vnodeid as well as other info
-*/
-/* ARGSUSED */
-int
-lstatv(__unused proc_t p,
-          __unused struct lstatv_args *uap,
-          __unused int32_t *retval)
-{
-       return (ENOTSUP);       /*  We'll just return an error for now */
-} /* end of lstatv system call */
+lck_grp_attr_t *  nspace_group_attr;
+lck_attr_t *      nspace_lock_attr;
+lck_grp_t *       nspace_mutex_group;
  
  
-/*
-* Extended fstat call which returns volumeid and vnodeid as well as other info
-*/
-/* ARGSUSED */
-int
-fstatv(__unused proc_t p, 
-          __unused struct fstatv_args *uap, 
-          __unused int32_t *retval)
+lck_mtx_t         nspace_handler_lock;
+lck_mtx_t         nspace_handler_exclusion_lock;
+
+time_t snapshot_timestamp=0;
+int nspace_allow_virtual_devs=0;
+
+void nspace_handler_init(void);
+
+typedef struct nspace_item_info {
+       struct vnode *vp;
+       void         *arg;
+       uint64_t      op;
+       uint32_t      vid;
+       uint32_t      flags;
+       uint32_t      token;
+       uint32_t      refcount;
+} nspace_item_info;
+
+#define MAX_NSPACE_ITEMS   128
+nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
+uint32_t      nspace_item_idx=0;              // also used as the sleep/wakeup rendezvous address
+uint32_t      nspace_token_id=0;
+uint32_t      nspace_handler_timeout = 15;    // seconds
+
+#define NSPACE_ITEM_NEW         0x0001
+#define NSPACE_ITEM_PROCESSING  0x0002
+#define NSPACE_ITEM_DEAD        0x0004
+#define NSPACE_ITEM_CANCELLED   0x0008
+#define NSPACE_ITEM_DONE        0x0010
+#define NSPACE_ITEM_RESET_TIMER 0x0020
+
+#define NSPACE_ITEM_NSPACE_EVENT   0x0040
+#define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
+
+#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
+
+//#pragma optimization_level 0
+
+typedef enum {
+       NSPACE_HANDLER_NSPACE = 0,
+       NSPACE_HANDLER_SNAPSHOT = 1,
+
+       NSPACE_HANDLER_COUNT,
+} nspace_type_t;
+
+typedef struct {
+       uint64_t handler_tid;
+       struct proc *handler_proc;
+       int handler_busy;
+} nspace_handler_t;
+
+nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
+
+/* namespace fsctl functions */
+static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
+static int nspace_item_flags_for_type(nspace_type_t nspace_type);
+static int nspace_open_flags_for_type(nspace_type_t nspace_type);
+static nspace_type_t nspace_type_for_op(uint64_t op);
+static int nspace_is_special_process(struct proc *proc);
+static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
+static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
+static int validate_namespace_args (int is64bit, int size);
+static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
+
+
+static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
+{
+       switch(nspace_type) {
+               case NSPACE_HANDLER_NSPACE:
+                       return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
+               case NSPACE_HANDLER_SNAPSHOT:
+                       return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
+               default:
+                       printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
+                       return 0;
+       }
+}
+
+static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
  {
  {
-       return (ENOTSUP);       /*  We'll just return an error for now */
-} /* end of fstatv system call */
+       switch(nspace_type) {
+               case NSPACE_HANDLER_NSPACE:
+                       return NSPACE_ITEM_NSPACE_EVENT;
+               case NSPACE_HANDLER_SNAPSHOT:
+                       return NSPACE_ITEM_SNAPSHOT_EVENT;
+               default:
+                       printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
+                       return 0;
+       }
+}
  
  
+static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
+{
+       switch(nspace_type) {
+               case NSPACE_HANDLER_NSPACE:
+                       return FREAD | FWRITE | O_EVTONLY;
+               case NSPACE_HANDLER_SNAPSHOT:
+                       return FREAD | O_EVTONLY;
+               default:
+                       printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
+                       return 0;
+       }
+}
  
  
-/************************************************/
-/* *** Preceding calls will be deleted soon *** */
-/************************************************/
+static inline nspace_type_t nspace_type_for_op(uint64_t op)
+{
+       switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
+               case NAMESPACE_HANDLER_NSPACE_EVENT:
+                       return NSPACE_HANDLER_NSPACE;
+               case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
+                       return NSPACE_HANDLER_SNAPSHOT;
+               default:
+                       printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
+                       return NSPACE_HANDLER_NSPACE;
+       }
+}
  
  
-#endif /* __APPLE_API_OBSOLETE */
+static inline int nspace_is_special_process(struct proc *proc)
+{
+       int i;
+       for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
+               if (proc == nspace_handlers[i].handler_proc)
+                       return 1;
+       }
+       return 0;
+}
  
  
-/*
-* Obtain attribute information on objects in a directory while enumerating
-* the directory.  This call does not yet support union mounted directories.
-* TO DO
-*  1.union mounted directories.
-*/
+void
+nspace_handler_init(void)
+{
+       nspace_lock_attr    = lck_attr_alloc_init();
+       nspace_group_attr   = lck_grp_attr_alloc_init();
+       nspace_mutex_group  = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
+       lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
+       lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
+       memset(&nspace_items[0], 0, sizeof(nspace_items));
+}
  
  
-/* ARGSUSED */
-int
-getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
+void
+nspace_proc_exit(struct proc *p)
  {
  {
-       vnode_t vp;
-       struct fileproc *fp;
-       uio_t auio = NULL;
-       int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
-       uint32_t count;
-       uint32_t newstate;
-       int error, eofflag;
-       uint32_t loff;
-       struct attrlist attributelist; 
-       vfs_context_t ctx = vfs_context_current();
-       int fd = uap->fd;
-       char uio_buf[ UIO_SIZEOF(1) ];
-       kauth_action_t action;
+       int i, event_mask = 0;
+       
+       for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
+               if (p == nspace_handlers[i].handler_proc) {
+                       event_mask |= nspace_item_flags_for_type(i);
+                       nspace_handlers[i].handler_tid = 0;
+                       nspace_handlers[i].handler_proc = NULL;
+               }
+       }
  
  
-       AUDIT_ARG(fd, fd);
-    
-       /* Get the attributes into kernel space */
-       if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
-               return(error);
+       if (event_mask == 0) {
+               return;
         }
         }
-       if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
-               return(error);
+       
+       if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
+               // if this process was the snapshot handler, zero snapshot_timeout
+               snapshot_timestamp = 0;
         }
         }
-       if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
-               return (error);
+       
+       //
+       // unblock anyone that's waiting for the handler that died
+       //
+       lck_mtx_lock(&nspace_handler_lock);
+       for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+               if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
+
+                       if ( nspace_items[i].flags & event_mask ) {
+
+                               if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
+                                       vnode_lock_spin(nspace_items[i].vp);
+                                       nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
+                                       vnode_unlock(nspace_items[i].vp);
+                               }
+                               nspace_items[i].vp = NULL;
+                               nspace_items[i].vid = 0;
+                               nspace_items[i].flags = NSPACE_ITEM_DONE;
+                               nspace_items[i].token = 0;
+                               
+                               wakeup((caddr_t)&(nspace_items[i].vp));
+                       }
+               }
         }
         }
-       if ((fp->f_fglob->fg_flag & FREAD) == 0) {
-               AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
-               error = EBADF;
-               goto out;
+       
+       wakeup((caddr_t)&nspace_item_idx);
+       lck_mtx_unlock(&nspace_handler_lock);
+}
+
+
+int 
+resolve_nspace_item(struct vnode *vp, uint64_t op)
+{
+       return resolve_nspace_item_ext(vp, op, NULL);
+}
+
+int 
+resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
+{
+       int i, error, keep_waiting;
+       struct timespec ts;
+       nspace_type_t nspace_type = nspace_type_for_op(op);
+
+       // only allow namespace events on regular files, directories and symlinks.
+       if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
+               return 0;
         }
  
         }
  
+       //
+       // if this is a snapshot event and the vnode is on a
+       // disk image just pretend nothing happened since any
+       // change to the disk image will cause the disk image
+       // itself to get backed up and this avoids multi-way
+       // deadlocks between the snapshot handler and the ever
+       // popular diskimages-helper process.  the variable
+       // nspace_allow_virtual_devs allows this behavior to
+       // be overridden (for use by the Mobile TimeMachine
+       // testing infrastructure which uses disk images)
+       //
+       if (   (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
+           && (vp->v_mount != NULL)
+           && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
+           && !nspace_allow_virtual_devs) {
  
  
-#if CONFIG_MACF
-       error = mac_file_check_change_offset(vfs_context_ucred(ctx),
-           fp->f_fglob);
-       if (error)
-               goto out;
-#endif
+               return 0;
+       }
  
  
+       // if (thread_tid(current_thread()) == namespace_handler_tid) {
+       if (nspace_handlers[nspace_type].handler_proc == NULL) {
+               return 0;
+       }
  
  
-       if ( (error = vnode_getwithref(vp)) )
-               goto out;
+       if (nspace_is_special_process(current_proc())) {
+               return EDEADLK;
+       }
  
  
-       AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+       lck_mtx_lock(&nspace_handler_lock);
  
  
-       if (vp->v_type != VDIR) {
-               (void)vnode_put(vp);
-               error = EINVAL;
-               goto out;
+retry:
+       for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+               if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
+                       break;
+               }
+       }
+
+       if (i >= MAX_NSPACE_ITEMS) {
+               for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+                       if (nspace_items[i].flags == 0) {
+                               break;
+                       }
+               }
+       } else {
+               nspace_items[i].refcount++;
         }
         }
+       
+       if (i >= MAX_NSPACE_ITEMS) {
+               ts.tv_sec = nspace_handler_timeout;
+               ts.tv_nsec = 0;
  
  
-#if CONFIG_MACF
-       error = mac_vnode_check_readdir(ctx, vp);
-       if (error != 0) {
-               (void)vnode_put(vp);
-               goto out;
+               error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
+               if (error == 0) {
+                       // an entry got free'd up, go see if we can get a slot
+                       goto retry;
+               } else {
+                       lck_mtx_unlock(&nspace_handler_lock);
+                       return error;
+               }
         }
         }
-#endif /* MAC */
  
  
-       /* set up the uio structure which will contain the users return buffer */
-       loff = fp->f_fglob->fg_offset;
-       auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, 
-           &uio_buf[0], sizeof(uio_buf));
-       uio_addiov(auio, uap->buffer, uap->buffersize);
-       
-       /*
-        * If the only item requested is file names, we can let that past with
-        * just LIST_DIRECTORY.  If they want any other attributes, that means
-        * they need SEARCH as well.
-        */
-       action = KAUTH_VNODE_LIST_DIRECTORY;
-       if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
-           attributelist.fileattr || attributelist.dirattr)
-               action |= KAUTH_VNODE_SEARCH;
-       
-       if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
+       //
+       // if it didn't already exist, add it.  if it did exist
+       // we'll get woken up when someone does a wakeup() on
+       // the slot in the nspace_items table.
+       //
+       if (vp != nspace_items[i].vp) {
+               nspace_items[i].vp = vp;
+               nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg;  // arg is {NULL, true, uio *} - only pass uio thru to the user
+               nspace_items[i].op = op;
+               nspace_items[i].vid = vnode_vid(vp);
+               nspace_items[i].flags = NSPACE_ITEM_NEW;
+               nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
+               if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
+                       if (arg) {
+                               vnode_lock_spin(vp);
+                               vp->v_flag |= VNEEDSSNAPSHOT;
+                               vnode_unlock(vp);
+                       }
+               }
  
  
-               /* Believe it or not, uap->options only has 32-bits of valid
-                * info, so truncate before extending again */
-               error = VNOP_READDIRATTR(vp, &attributelist, auio,
-                                        count,
-                                        (u_long)(uint32_t)uap->options, &newstate, &eofflag,
-                                        &count, ctx);
+               nspace_items[i].token = 0;
+               nspace_items[i].refcount = 1;
+               
+               wakeup((caddr_t)&nspace_item_idx);
         }
         }
-       (void)vnode_put(vp);
  
  
-       if (error) 
-               goto out;
-       fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
+       //
+       // Now go to sleep until the handler does a wakeup on this
+       // slot in the nspace_items table (or we timeout).
+       //
+       keep_waiting = 1;
+       while(keep_waiting) {
+               ts.tv_sec = nspace_handler_timeout;
+               ts.tv_nsec = 0;
+               error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
  
  
-       if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
-               goto out;
-       if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
-               goto out;
-       if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
-               goto out;
+               if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
+                       error = 0;
+               } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
+                       error = nspace_items[i].token;
+               } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
+                       if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
+                               nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
+                               continue;
+                       } else {
+                               error = ETIMEDOUT;
+                       }
+               } else if (error == 0) {
+                       // hmmm, why did we get woken up?
+                       printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
+                              nspace_items[i].token);
+               } 
  
  
-       *retval = eofflag;  /* similar to getdirentries */
-       error = 0;
-out:
-       file_drop(fd);
-       return (error); /* return error earlier, an retval of 0 or 1 now */
+               if (--nspace_items[i].refcount == 0) {
+                       nspace_items[i].vp = NULL;     // clear this so that no one will match on it again
+                       nspace_items[i].arg = NULL;
+                       nspace_items[i].token = 0;     // clear this so that the handler will not find it anymore
+                       nspace_items[i].flags = 0;     // this clears it for re-use
+               }
+               wakeup(&nspace_token_id);
+               keep_waiting = 0;
+       }
  
  
-} /* end of getdirentryattr system call */
+       lck_mtx_unlock(&nspace_handler_lock);
+
+       return error;
+}
  
  
-/*
-* Exchange data between two files
-*/
  
  
-/* ARGSUSED */
  int
  int
-exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
+get_nspace_item_status(struct vnode *vp, int32_t *status)
  {
  {
+       int i;
  
  
-       struct nameidata fnd, snd;
-       vfs_context_t ctx = vfs_context_current();
-       vnode_t fvp;
-       vnode_t svp;
-       int error;
-       u_int32_t nameiflags;
-       char *fpath = NULL;
-       char *spath = NULL;
-       int   flen=0, slen=0;
-       int from_truncated=0, to_truncated=0;
-#if CONFIG_FSE
-       fse_info f_finfo, s_finfo;
-#endif
-       
-       nameiflags = 0;
-       if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
+       lck_mtx_lock(&nspace_handler_lock);
+       for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+               if (nspace_items[i].vp == vp) {
+                       break;
+               }
+       }
  
  
-    NDINIT(&fnd, LOOKUP, nameiflags | AUDITVNPATH1, 
-               UIO_USERSPACE, uap->path1, ctx);
+       if (i >= MAX_NSPACE_ITEMS) {
+               lck_mtx_unlock(&nspace_handler_lock);
+               return ENOENT;
+       }
  
  
-    error = namei(&fnd);
-    if (error)
-        goto out2;
+       *status = nspace_items[i].flags;
+       lck_mtx_unlock(&nspace_handler_lock);
+       return 0;
+}
+       
  
  
-       nameidone(&fnd);
-       fvp = fnd.ni_vp;
+#if 0
+static int
+build_volfs_path(struct vnode *vp, char *path, int *len)
+{
+       struct vnode_attr va;
+       int ret;
  
  
-    NDINIT(&snd, LOOKUP | CN_NBMOUNTLOOK, nameiflags | AUDITVNPATH2, 
-               UIO_USERSPACE, uap->path2, ctx);
+       VATTR_INIT(&va);
+       VATTR_WANTED(&va, va_fsid);
+       VATTR_WANTED(&va, va_fileid);
  
  
-    error = namei(&snd);
-    if (error) {
-               vnode_put(fvp);
-               goto out2;
-    }
-       nameidone(&snd);
-       svp = snd.ni_vp;
+       if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
+               *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
+               ret = -1;
+       } else {
+               *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
+               ret = 0;
+       }
  
  
-       /*
-        * if the files are the same, return an inval error
-        */
-       if (svp == fvp) {
-               error = EINVAL;
-               goto out;
-       } 
+       return ret;
+}
+#endif
  
  
-       /*
-        * if the files are on different volumes, return an error
-        */
-       if (svp->v_mount != fvp->v_mount) {
-               error = EXDEV;
-               goto out;
+//
+// Note: this function does NOT check permissions on all of the
+// parent directories leading to this vnode.  It should only be
+// called on behalf of a root process.  Otherwise a process may
+// get access to a file because the file itself is readable even
+// though its parent directories would prevent access.
+//
+static int
+vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
+{
+       int error, action;
+
+       if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+               return error;
         }
  
  #if CONFIG_MACF
         }
  
  #if CONFIG_MACF
-       error = mac_vnode_check_exchangedata(ctx,
-           fvp, svp);
+       error = mac_vnode_check_open(ctx, vp, fmode);
         if (error)
         if (error)
-               goto out;
+               return error;
  #endif
  #endif
-       if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
-           ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
-               goto out;
  
  
-       if (
-#if CONFIG_FSE
-       need_fsevent(FSE_EXCHANGE, fvp) || 
-#endif
-       kauth_authorize_fileop_has_listeners()) {
-               GET_PATH(fpath);
-               GET_PATH(spath);
-               if (fpath == NULL || spath == NULL) {
-                       error = ENOMEM;
-                       goto out;
+       /* compute action to be authorized */
+       action = 0;
+       if (fmode & FREAD) {
+               action |= KAUTH_VNODE_READ_DATA;
+       }
+       if (fmode & (FWRITE | O_TRUNC)) {
+               /*
+                * If we are writing, appending, and not truncating,
+                * indicate that we are appending so that if the
+                * UF_APPEND or SF_APPEND bits are set, we do not deny
+                * the open.
+                */
+               if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
+                       action |= KAUTH_VNODE_APPEND_DATA;
+               } else {
+                       action |= KAUTH_VNODE_WRITE_DATA;
                 }
                 }
+       }
  
  
-               flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
-               slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
+       if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
+               return error;
                 
                 
-#if CONFIG_FSE
-               get_fse_info(fvp, &f_finfo, ctx);
-               get_fse_info(svp, &s_finfo, ctx);
-               if (from_truncated || to_truncated) {
-                       // set it here since only the f_finfo gets reported up to user space
-                       f_finfo.mode |= FSE_TRUNCATED_PATH;
-               }
+
+       //
+       // if the vnode is tagged VOPENEVT and the current process
+       // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
+       // flag to the open mode so that this open won't count against
+       // the vnode when carbon delete() does a vnode_isinuse() to see
+       // if a file is currently in use.  this allows spotlight
+       // importers to not interfere with carbon apps that depend on
+       // the no-delete-if-busy semantics of carbon delete().
+       //
+       if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
+               fmode |= O_EVTONLY;
+       }
+
+       if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
+               return error;
+       }
+       if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
+               VNOP_CLOSE(vp, fmode, ctx);
+               return error;
+       }
+
+       /* Call out to allow 3rd party notification of open. 
+        * Ignore result of kauth_authorize_fileop call.
+        */
+#if CONFIG_MACF
+       mac_vnode_notify_open(ctx, vp, fmode);
  #endif
  #endif
+       kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, 
+                              (uintptr_t)vp, 0);
+
+
+       return 0;
+}
+
+static int
+wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
+{
+       int i, error=0, unblock=0;
+       task_t curtask;
+       
+       lck_mtx_lock(&nspace_handler_exclusion_lock);
+       if (nspace_handlers[nspace_type].handler_busy) {
+               lck_mtx_unlock(&nspace_handler_exclusion_lock);
+               return EBUSY;
         }
         }
-       /* Ok, make the call */
-       error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
+       nspace_handlers[nspace_type].handler_busy = 1;
+       lck_mtx_unlock(&nspace_handler_exclusion_lock);
+       
+       /* 
+        * Any process that gets here will be one of the namespace handlers.
+        * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
+        * as we can cause deadlocks to occur, because the namespace handler may prevent
+        * VNOP_INACTIVE from proceeding.  Mark the current task as a P_DEPENDENCY_CAPABLE 
+        * process.
+        */
+       curtask = current_task();
+       bsd_set_dependency_capable (curtask);   
+       
+       lck_mtx_lock(&nspace_handler_lock);
+       if (nspace_handlers[nspace_type].handler_proc == NULL) {
+               nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
+               nspace_handlers[nspace_type].handler_proc = current_proc();
+       }
+       
+       while (error == 0) {
+               
+               for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+                       if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
+                               if (!nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
+                                       continue;
+                               }
+                               break;
+                       }
+               }
+               
+               if (i < MAX_NSPACE_ITEMS) {
+                       nspace_items[i].flags  &= ~NSPACE_ITEM_NEW;
+                       nspace_items[i].flags  |= NSPACE_ITEM_PROCESSING;
+                       nspace_items[i].token  = ++nspace_token_id;
+                       
+                       if (nspace_items[i].vp) {
+                               struct fileproc *fp;
+                               int32_t indx, fmode;
+                               struct proc *p = current_proc();
+                               vfs_context_t ctx = vfs_context_current();
+                               struct vnode_attr va;
  
  
-       if (error == 0) {
-           const char *tmpname;
  
  
-           if (fpath != NULL && spath != NULL) {
-                   /* call out to allow 3rd party notification of exchangedata. 
-                    * Ignore result of kauth_authorize_fileop call.
-                    */
-                   kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE, 
-                                          (uintptr_t)fpath, (uintptr_t)spath);
-           }
-           name_cache_lock();
+                               /* 
+                                * Use vnode pointer to acquire a file descriptor for
+                                * hand-off to userland
+                                */
+                               fmode = nspace_open_flags_for_type(nspace_type);
+                               error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
+                               if (error) {
+                                       unblock = 1;
+                                       break;
+                               }
+                               error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
+                               if (error) {
+                                       unblock = 1;
+                                       vnode_put(nspace_items[i].vp);
+                                       break;
+                               }
+                               
+                               if ((error = falloc(p, &fp, &indx, ctx))) {
+                                       vn_close(nspace_items[i].vp, fmode, ctx);
+                                       vnode_put(nspace_items[i].vp);
+                                       unblock = 1;
+                                       break;
+                               }
+                               
+                               fp->f_fglob->fg_flag = fmode;
+                               fp->f_fglob->fg_ops = &vnops;
+                               fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
+                               
+                               proc_fdlock(p);
+                               procfdtbl_releasefd(p, indx, NULL);
+                               fp_drop(p, indx, fp, 1);
+                               proc_fdunlock(p);       
+
+                               /* 
+                                * All variants of the namespace handler struct support these three fields:
+                                * token, flags, and the FD pointer
+                                */
+                               error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
+                               error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
+                               error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
+
+                               /* 
+                                * Handle optional fields:
+                                * extended version support an info ptr (offset, length), and the
+                                * 
+                                * namedata version supports a unique per-link object ID
+                                *
+                                */
+                               if (nhd->infoptr) {
+                                       uio_t uio = (uio_t)nspace_items[i].arg;
+                                       uint64_t u_offset, u_length;
+                                       
+                                       if (uio) {
+                                               u_offset = uio_offset(uio);
+                                               u_length = uio_resid(uio);
+                                       } else {
+                                               u_offset = 0;
+                                               u_length = 0;
+                                       }                                               
+                                       error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
+                                       error = copyout(&u_length, nhd->infoptr+sizeof(uint64_t), sizeof(uint64_t));
+                               }
  
  
-           tmpname     = fvp->v_name;
-           fvp->v_name = svp->v_name;
-           svp->v_name = tmpname;
-           
-           if (fvp->v_parent != svp->v_parent) {
-               vnode_t tmp;
+                               if (nhd->objid) {       
+                                       VATTR_INIT(&va);
+                                       VATTR_WANTED(&va, va_linkid);
+                                       error = vnode_getattr(nspace_items[i].vp, &va, ctx);
+                                       if (error == 0 ) {
+                                               uint64_t linkid = 0;
+                                               if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
+                                                       linkid = (uint64_t)va.va_linkid;
+                                               }
+                                               error = copyout (&linkid, nhd->objid, sizeof(uint64_t));
+                                       }
+                               }
  
  
-               tmp           = fvp->v_parent;
-               fvp->v_parent = svp->v_parent;
-               svp->v_parent = tmp;
-           }
-           name_cache_unlock();
+                               if (error) {
+                                       vn_close(nspace_items[i].vp, fmode, ctx);
+                                       fp_free(p, indx, fp);
+                                       unblock = 1;
+                               }
+                               
+                               vnode_put(nspace_items[i].vp);
+                               
+                               break;
+                       } else {
+                               printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
+                                      i, nspace_items[i].vp, error, nspace_items[i].vp->v_name);
+                       }
+                       
+               } else {
+                       error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
+                       if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
+                               error = EINVAL;
+                               break;
+                       }
+                       
+               }
+       }
+       
+       if (unblock) {
+               if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
+                       vnode_lock_spin(nspace_items[i].vp);
+                       nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
+                       vnode_unlock(nspace_items[i].vp);
+               }
+               nspace_items[i].vp = NULL;
+               nspace_items[i].vid = 0;
+               nspace_items[i].flags = NSPACE_ITEM_DONE;
+               nspace_items[i].token = 0;
+               
+               wakeup((caddr_t)&(nspace_items[i].vp));
+       }
+       
+       if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
+               // just go through every snapshot event and unblock it immediately.
+               if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
+                       for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+                               if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
+                                       if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
+                                               nspace_items[i].vp = NULL;
+                                               nspace_items[i].vid = 0;
+                                               nspace_items[i].flags = NSPACE_ITEM_DONE;
+                                               nspace_items[i].token = 0;
+                                               
+                                               wakeup((caddr_t)&(nspace_items[i].vp));                                 
+                                       }
+                               }
+                       }
+               }
+       }
+       
+       lck_mtx_unlock(&nspace_handler_lock);
+       
+       lck_mtx_lock(&nspace_handler_exclusion_lock);
+       nspace_handlers[nspace_type].handler_busy = 0;
+       lck_mtx_unlock(&nspace_handler_exclusion_lock);
+       
+       return error;
+}
  
  
-#if CONFIG_FSE
-           if (fpath != NULL && spath != NULL) {
-                   add_fsevent(FSE_EXCHANGE, ctx,
-                               FSE_ARG_STRING, flen, fpath,
-                               FSE_ARG_FINFO, &f_finfo,
-                               FSE_ARG_STRING, slen, spath,
-                               FSE_ARG_FINFO, &s_finfo,
-                               FSE_ARG_DONE);
-           }
-#endif
+static inline int validate_namespace_args (int is64bit, int size) {
+
+       if (is64bit) {
+               /* Must be one of these */
+               if (size == sizeof(user64_namespace_handler_info)) {
+                       goto sizeok;
+               }
+               if (size == sizeof(user64_namespace_handler_info_ext)) {
+                       goto sizeok;
+               }
+               if (size == sizeof(user64_namespace_handler_data)) {
+                       goto sizeok;
+               }
+               return EINVAL;
+       }
+       else {
+               /* 32 bit -- must be one of these */
+               if (size == sizeof(user32_namespace_handler_info)) {
+                       goto sizeok;
+               }
+               if (size == sizeof(user32_namespace_handler_info_ext)) {
+                       goto sizeok;
+               }
+               if (size == sizeof(user32_namespace_handler_data)) {
+                       goto sizeok;
+               }
+               return EINVAL;
         }
  
         }
  
-out:
-       if (fpath != NULL)
-               RELEASE_PATH(fpath);
-       if (spath != NULL)
-               RELEASE_PATH(spath);
-       vnode_put(svp);
-       vnode_put(fvp);
-out2:
-        return (error);
+sizeok:
+
+       return 0;
+
  }
  
  }
  
+static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
+{
+       int error = 0;
+       namespace_handler_data nhd;
+       
+       bzero (&nhd, sizeof(namespace_handler_data));
  
  
-/* ARGSUSED */
+       if (nspace_type == NSPACE_HANDLER_SNAPSHOT && 
+                       (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
+               return EINVAL;
+       }
+       
+       if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+               return error;
+       }
+       
+       error = validate_namespace_args (is64bit, size);
+       if (error) {
+               return error;
+       }
+       
+       /* Copy in the userland pointers into our kernel-only struct */
+
+       if (is64bit) {
+               /* 64 bit userland structures */
+               nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
+               nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
+               nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
+
+               /* If the size is greater than the standard info struct, add in extra fields */
+               if (size > (sizeof(user64_namespace_handler_info))) {
+                       if (size >= (sizeof(user64_namespace_handler_info_ext))) {
+                               nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
+                       }
+                       if (size == (sizeof(user64_namespace_handler_data))) {
+                               nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
+                       }
+                       /* Otherwise the fields were pre-zeroed when we did the bzero above. */
+               }
+       } 
+       else {
+               /* 32 bit userland structures */
+               nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
+               nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
+               nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
+               
+               if (size > (sizeof(user32_namespace_handler_info))) {
+                       if (size >= (sizeof(user32_namespace_handler_info_ext))) {
+                               nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
+                       }
+                       if (size == (sizeof(user32_namespace_handler_data))) {
+                               nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
+                       }
+                       /* Otherwise the fields were pre-zeroed when we did the bzero above. */
+               }
+       }
+       
+       return wait_for_namespace_event(&nhd, nspace_type);
+}
  
  
-int
-searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
+/*
+ * Make a filesystem-specific control call:
+ */
+/* ARGSUSED */
+static int
+fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
  {
  {
-       vnode_t vp;
         int error=0;
         int error=0;
-       int fserror = 0;
-       struct nameidata nd;
-       struct user64_fssearchblock searchblock;
-       struct searchstate *state;
-       struct attrlist *returnattrs;
-       struct timeval timelimit;
-       void *searchparams1,*searchparams2;
-       uio_t auio = NULL;
-       int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
-       uint32_t nummatches;
-       int mallocsize;
-       uint32_t nameiflags;
-       vfs_context_t ctx = vfs_context_current();
-       char uio_buf[ UIO_SIZEOF(1) ];
+       boolean_t is64bit;
+       u_int size;
+#define STK_PARAMS 128
+       char stkbuf[STK_PARAMS];
+       caddr_t data, memp;
+       vnode_t vp = *arg_vp;
  
  
-       /* Start by copying in fsearchblock paramater list */
-    if (IS_64BIT_PROCESS(p)) {
-        error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
-        timelimit.tv_sec = searchblock.timelimit.tv_sec;
-        timelimit.tv_usec = searchblock.timelimit.tv_usec;
-    }
-    else {
-        struct user32_fssearchblock tmp_searchblock;
+       size = IOCPARM_LEN(cmd);
+       if (size > IOCPARM_MAX) return (EINVAL);
  
  
-        error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
-        // munge into 64-bit version
-        searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
-        searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
-        searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
-        searchblock.maxmatches = tmp_searchblock.maxmatches;
-               /* 
-                * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
-                * from a 32 bit long, and tv_usec is already a signed 32 bit int.
-                */
-        timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
-        timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
-        searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
-        searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
-        searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
-        searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
-        searchblock.searchattrs = tmp_searchblock.searchattrs;
-    }
-       if (error)
-               return(error);
+       is64bit = proc_is64bit(p);
  
  
-       /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.  
+       memp = NULL;
+
+       /*
+        * ensure the buffer is large enough for underlying calls
          */
          */
-       if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS || 
-               searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
-               return(EINVAL);
-       
-       /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
-       /* It all has to do into local memory and it's not that big so we might as well  put it all together. */
-       /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
-       /* block.                                                                                             */
-       
-       mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
-                     sizeof(struct attrlist) + sizeof(struct searchstate);
+#ifndef HFSIOC_GETPATH
+typedef char pn_t[MAXPATHLEN];
+#define HFSIOC_GETPATH  _IOWR('h', 13, pn_t)
+#endif
  
  
-       MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
+#ifndef HFS_GETPATH
+#define HFS_GETPATH  IOCBASECMD(HFSIOC_GETPATH)
+#endif
+       if (IOCBASECMD(cmd) == HFS_GETPATH) {
+               /* Round up to MAXPATHLEN regardless of user input */
+               size = MAXPATHLEN;
+       }
  
  
-       /* Now set up the various pointers to the correct place in our newly allocated memory */
  
  
-       searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
-       returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
-       state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
+       if (size > sizeof (stkbuf)) {
+               if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
+               data = memp;
+       } else {
+               data = &stkbuf[0];
+       };
+       
+       if (cmd & IOC_IN) {
+               if (size) {
+                       error = copyin(udata, data, size);
+                       if (error) { 
+                               if (memp) {
+                                       kfree (memp, size);     
+                               }
+                               return error;
+                       }
+               } else {
+                       if (is64bit) {
+                               *(user_addr_t *)data = udata;
+                       }
+                       else {
+                               *(uint32_t *)data = (uint32_t)udata;
+                       }
+               };
+       } else if ((cmd & IOC_OUT) && size) {
+               /*
+                * Zero the buffer so the user always
+                * gets back something deterministic.
+                */
+               bzero(data, size);
+       } else if (cmd & IOC_VOID) {
+               if (is64bit) {
+                       *(user_addr_t *)data = udata;
+               }
+               else {
+                       *(uint32_t *)data = (uint32_t)udata;
+               }
+       }
  
  
-       /* Now copy in the stuff given our local variables. */
+       /* Check to see if it's a generic command */
+       switch (IOCBASECMD(cmd)) {
  
  
-       if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
-               goto freeandexit;
+               case FSCTL_SYNC_VOLUME: {
+                       mount_t mp = vp->v_mount;
+                       int arg = *(uint32_t*)data;
  
  
-       if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
-               goto freeandexit;
+                       /* record vid of vp so we can drop it below. */
+                       uint32_t vvid = vp->v_id;
  
  
-       if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
-               goto freeandexit;
-               
-       if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
-               goto freeandexit;
+                       /*
+                        * Then grab mount_iterref so that we can release the vnode.
+                        * Without this, a thread may call vnode_iterate_prepare then
+                        * get into a deadlock because we've never released the root vp
+                        */
+                       error = mount_iterref (mp, 0);
+                       if (error)  {
+                               break;
+                       }
+                       vnode_put(vp);
  
  
+                       /* issue the sync for this volume */
+                       (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
  
  
-       /*
-        * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
-        * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
-        * The KPI does not provide us the ability to pass in the length of the buffers searchparams1 
-        * and searchparams2. To obviate the need for all searchfs-supporting filesystems to 
-        * validate the user-supplied data offset of the attrreference_t, we'll do it here.
-        */
+                       /* 
+                        * Then release the mount_iterref once we're done syncing; it's not
+                        * needed for the VNOP_IOCTL below
+                        */
+                       mount_iterdrop(mp);
+
+                       if (arg & FSCTL_SYNC_FULLSYNC) {
+                               /* re-obtain vnode iocount on the root vp, if possible */
+                               error = vnode_getwithvid (vp, vvid);
+                               if (error == 0) {
+                                       error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
+                                       vnode_put (vp);
+                               }
+                       }
+                       /* mark the argument VP as having been released */
+                       *arg_vp = NULL;
+               }
+               break;
  
  
-       if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
-               attrreference_t* string_ref;
-               u_int32_t* start_length;
-               user64_size_t param_length;            
+               case FSCTL_SET_PACKAGE_EXTS: {
+                       user_addr_t ext_strings;
+                       uint32_t    num_entries;
+                       uint32_t    max_width;
  
  
-               /* validate searchparams1 */
-               param_length = searchblock.sizeofsearchparams1;                                           
-               /* skip the word that specifies length of the buffer */
-               start_length= (u_int32_t*) searchparams1;
-               start_length= start_length+1;
-               string_ref= (attrreference_t*) start_length;
+                       if (   (is64bit && size != sizeof(user64_package_ext_info))
+                                       || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
  
  
-               /* ensure no negative offsets or too big offsets */
-               if (string_ref->attr_dataoffset < 0 ) {
-                       error = EINVAL;
-                       goto freeandexit;               
-               }
-               if (string_ref->attr_length > MAXPATHLEN) {
-                       error = EINVAL;
-                       goto freeandexit;
-               }
-               
-               /* Check for pointer overflow in the string ref */
-               if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
-                       error = EINVAL;
-                       goto freeandexit;
+                               // either you're 64-bit and passed a 64-bit struct or
+                               // you're 32-bit and passed a 32-bit struct.  otherwise
+                               // it's not ok.
+                               error = EINVAL;
+                               break;
+                       }
+
+                       if (is64bit) {
+                               ext_strings = ((user64_package_ext_info *)data)->strings;
+                               num_entries = ((user64_package_ext_info *)data)->num_entries;
+                               max_width   = ((user64_package_ext_info *)data)->max_width;
+                       } else {
+                               ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
+                               num_entries = ((user32_package_ext_info *)data)->num_entries;
+                               max_width   = ((user32_package_ext_info *)data)->max_width;
+                       }
+                       error = set_package_extensions_table(ext_strings, num_entries, max_width);
                 }
                 }
+               break;
  
  
-               if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
-                       error = EINVAL;
-                       goto freeandexit;
+               /* namespace handlers */        
+               case FSCTL_NAMESPACE_HANDLER_GET: {
+                       error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
                 }
                 }
-               if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
-                       error = EINVAL;
-                       goto freeandexit;
+               break;
+
+               /* Snapshot handlers */
+               case FSCTL_OLD_SNAPSHOT_HANDLER_GET: {
+                       error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
+               } 
+               break;
+
+               case FSCTL_SNAPSHOT_HANDLER_GET_EXT: {
+                       error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
                 }
                 }
-       }
+               break;  
  
  
-       /* set up the uio structure which will contain the users return buffer */
-       auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, 
-                                                                 &uio_buf[0], sizeof(uio_buf));
-    uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
+               case FSCTL_NAMESPACE_HANDLER_UPDATE: {
+                       uint32_t token, val;
+                       int i;
  
  
-       nameiflags = 0;
-       if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
-       NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, 
-               UIO_USERSPACE, uap->path, ctx);
+                       if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
+                               break;
+                       }
  
  
-       error = namei(&nd);
-       if (error)
-               goto freeandexit;
+                       if (!nspace_is_special_process(p)) {
+                               error = EINVAL;
+                               break;
+                       }
  
  
-       nameidone(&nd);
-       vp = nd.ni_vp; 
+                       token = ((uint32_t *)data)[0];
+                       val   = ((uint32_t *)data)[1];
  
  
-        
-       /*
-        * If searchblock.maxmatches == 0, then skip the search. This has happened 
-        * before and sometimes the underlyning code doesnt deal with it well.
-        */
-        if (searchblock.maxmatches == 0) {
-               nummatches = 0;
-               goto saveandexit;
-        }
+                       lck_mtx_lock(&nspace_handler_lock);
  
  
-       /*
-          Allright, we have everything we need, so lets make that call.
-          
-          We keep special track of the return value from the file system:
-          EAGAIN is an acceptable error condition that shouldn't keep us
-          from copying out any results...
-        */
+                       for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+                               if (nspace_items[i].token == token) {
+                                       break;  /* exit for loop, not case stmt */
+                               }
+                       }
+
+                       if (i >= MAX_NSPACE_ITEMS) {
+                               error = ENOENT;
+                       } else {
+                               //
+                               // if this bit is set, when resolve_nspace_item() times out
+                               // it will loop and go back to sleep.
+                               //
+                               nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
+                       }
+
+                       lck_mtx_unlock(&nspace_handler_lock);
+
+                       if (error) {
+                               printf("nspace-handler-update: did not find token %u\n", token);
+                       }
+               } 
+               break;
+       
+               case FSCTL_NAMESPACE_HANDLER_UNBLOCK: { 
+                       uint32_t token, val;
+                       int i;
+
+                       if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
+                               break;
+                       }
  
  
-       fserror = VNOP_SEARCHFS(vp,
-                                                       searchparams1,
-                                                       searchparams2,
-                                                       &searchblock.searchattrs,
-                                                       (u_long)searchblock.maxmatches,
-                                                       &timelimit,
-                                                       returnattrs,
-                                                       &nummatches,
-                                                       (u_long)uap->scriptcode,
-                                                       (u_long)uap->options,
-                                                       auio,
-                                                       state,
-                                                       ctx);
-               
-saveandexit:
+                       if (!nspace_is_special_process(p)) {
+                               error = EINVAL;
+                               break;
+                       }
  
  
-       vnode_put(vp);
+                       token = ((uint32_t *)data)[0];
+                       val   = ((uint32_t *)data)[1];
  
  
-       /* Now copy out the stuff that needs copying out. That means the number of matches, the
-          search state.  Everything was already put into he return buffer by the vop call. */
+                       lck_mtx_lock(&nspace_handler_lock);
  
  
-       if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
-               goto freeandexit;
+                       for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+                               if (nspace_items[i].token == token) {
+                                       break; /* exit for loop, not case statement */
+                               }
+                       }
  
  
-    if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
-               goto freeandexit;
-       
-       error = fserror;
+                       if (i >= MAX_NSPACE_ITEMS) {
+                               printf("nspace-handler-unblock: did not find token %u\n", token);
+                               error = ENOENT;
+                       } else {
+                               if (val == 0 && nspace_items[i].vp) {
+                                       vnode_lock_spin(nspace_items[i].vp);
+                                       nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
+                                       vnode_unlock(nspace_items[i].vp);
+                               }
  
  
-freeandexit:
+                               nspace_items[i].vp = NULL;
+                               nspace_items[i].arg = NULL;
+                               nspace_items[i].op = 0;
+                               nspace_items[i].vid = 0;
+                               nspace_items[i].flags = NSPACE_ITEM_DONE;
+                               nspace_items[i].token = 0;
  
  
-       FREE(searchparams1,M_TEMP);
+                               wakeup((caddr_t)&(nspace_items[i].vp));
+                       }
  
  
-       return(error);
+                       lck_mtx_unlock(&nspace_handler_lock);
+               } 
+               break;
  
  
+               case FSCTL_NAMESPACE_HANDLER_CANCEL: {
+                       uint32_t token, val;
+                       int i;
  
  
-} /* end of searchfs system call */
+                       if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
+                               break;
+                       }
  
  
+                       if (!nspace_is_special_process(p)) {
+                               error = EINVAL;
+                               break;
+                       }
  
  
-/*
- * Make a filesystem-specific control call:
- */
-/* ARGSUSED */
-static int
-fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
-{
-       int error=0;
-       boolean_t is64bit;
-       u_int size;
-#define STK_PARAMS 128
-       char stkbuf[STK_PARAMS];
-       caddr_t data, memp;
-       vnode_t vp = *arg_vp;
+                       token = ((uint32_t *)data)[0];
+                       val   = ((uint32_t *)data)[1];
  
  
-       size = IOCPARM_LEN(cmd);
-       if (size > IOCPARM_MAX) return (EINVAL);
+                       lck_mtx_lock(&nspace_handler_lock);
  
  
-    is64bit = proc_is64bit(p);
+                       for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+                               if (nspace_items[i].token == token) {
+                                       break;  /* exit for loop, not case stmt */
+                               }
+                       }
  
  
-       memp = NULL;
-       if (size > sizeof (stkbuf)) {
-               if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
-               data = memp;
-       } else {
-               data = &stkbuf[0];
-       };
-       
-       if (cmd & IOC_IN) {
-               if (size) {
-                       error = copyin(udata, data, size);
-                       if (error) goto FSCtl_Exit;
-               } else {
-                   if (is64bit) {
-                       *(user_addr_t *)data = udata;
-                   }
-                   else {
-                       *(uint32_t *)data = (uint32_t)udata;
-                   }
-               };
-       } else if ((cmd & IOC_OUT) && size) {
-               /*
-                * Zero the buffer so the user always
-                * gets back something deterministic.
-                */
-               bzero(data, size);
-       } else if (cmd & IOC_VOID) {
-               if (is64bit) {
-                   *(user_addr_t *)data = udata;
-               }
-               else {
-                   *(uint32_t *)data = (uint32_t)udata;
-               }
-       }
+                       if (i >= MAX_NSPACE_ITEMS) {
+                               printf("nspace-handler-cancel: did not find token %u\n", token);
+                               error = ENOENT;
+                       } else {
+                               if (nspace_items[i].vp) {
+                                       vnode_lock_spin(nspace_items[i].vp);
+                                       nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
+                                       vnode_unlock(nspace_items[i].vp);
+                               }
  
  
-       /* Check to see if it's a generic command */
-       if (IOCBASECMD(cmd) == FSCTL_SYNC_VOLUME) {
-               mount_t mp = vp->v_mount;
-               int arg = *(uint32_t*)data;
-               
-               /* record vid of vp so we can drop it below. */
-               uint32_t vvid = vp->v_id;
+                               nspace_items[i].vp = NULL;                      
+                               nspace_items[i].arg = NULL;                     
+                               nspace_items[i].vid = 0;
+                               nspace_items[i].token = val;
+                               nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
+                               nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;                 
  
  
-               /*
-                * Then grab mount_iterref so that we can release the vnode.
-                * Without this, a thread may call vnode_iterate_prepare then
-                * get into a deadlock because we've never released the root vp
-                */
-               error = mount_iterref (mp, 0);
-               if (error)  {
-                       goto FSCtl_Exit;
-               }
-               vnode_put(vp);
+                               wakeup((caddr_t)&(nspace_items[i].vp));
+                       }
  
  
-               /* issue the sync for this volume */
-               (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
-               
-               /* 
-                * Then release the mount_iterref once we're done syncing; it's not
-                * needed for the VNOP_IOCTL below
-                */
-               mount_iterdrop(mp);
+                       lck_mtx_unlock(&nspace_handler_lock);
+               } 
+               break;
  
  
-               if (arg & FSCTL_SYNC_FULLSYNC) {
-                       /* re-obtain vnode iocount on the root vp, if possible */
-                       error = vnode_getwithvid (vp, vvid);
-                       if (error == 0) {
-                               error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
-                               vnode_put (vp);
+               case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
+                       if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+                               break;
                         }
                         }
-               }
-               /* mark the argument VP as having been released */
-               *arg_vp = NULL;
  
  
-       } else if (IOCBASECMD(cmd) == FSCTL_SET_PACKAGE_EXTS) {
-           user_addr_t ext_strings;
-           uint32_t    num_entries;
-           uint32_t    max_width;
-           
-           if (   (is64bit && size != sizeof(user64_package_ext_info))
-               || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
+                       // we explicitly do not do the namespace_handler_proc check here
  
  
-               // either you're 64-bit and passed a 64-bit struct or
-               // you're 32-bit and passed a 32-bit struct.  otherwise
-               // it's not ok.
-               error = EINVAL;
-               goto FSCtl_Exit;
-           }
+                       lck_mtx_lock(&nspace_handler_lock);
+                       snapshot_timestamp = ((uint32_t *)data)[0];
+                       wakeup(&nspace_item_idx);
+                       lck_mtx_unlock(&nspace_handler_lock);
+                       printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
  
  
-           if (is64bit) {
-               ext_strings = ((user64_package_ext_info *)data)->strings;
-               num_entries = ((user64_package_ext_info *)data)->num_entries;
-               max_width   = ((user64_package_ext_info *)data)->max_width;
-           } else {
-               ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
-               num_entries = ((user32_package_ext_info *)data)->num_entries;
-               max_width   = ((user32_package_ext_info *)data)->max_width;
-           }
-           
-           error = set_package_extensions_table(ext_strings, num_entries, max_width);
+               } 
+               break;
  
  
-       } else if (IOCBASECMD(cmd) == FSCTL_WAIT_FOR_SYNC) {
-               error = tsleep((caddr_t)&sync_wait_time, PVFS|PCATCH, "sync-wait", 0);
-               if (error == 0) {
-                       *(uint32_t *)data = (uint32_t)sync_wait_time;
+               case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
+               {
+                       if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+                               break;
+                       }
+
+                       lck_mtx_lock(&nspace_handler_lock);
+                       nspace_allow_virtual_devs = ((uint32_t *)data)[0];
+                       lck_mtx_unlock(&nspace_handler_lock);
+                       printf("nspace-snapshot-handler will%s allow events on disk-images\n",
+                                       nspace_allow_virtual_devs ? "" : " NOT");
                         error = 0;
                         error = 0;
-               } else {
-                       error *= -1;
+
                 }
                 }
-                       
-       } else {
-               /* Invoke the filesystem-specific code */
-               error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
-       }
-       
-       
+               break;
+
+               case FSCTL_SET_FSTYPENAME_OVERRIDE: 
+               {       
+                       if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+                               break;
+                       }
+                       if (vp->v_mount) {
+                               mount_lock(vp->v_mount);
+                               if (data[0] != 0) {
+                                       strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
+                                       vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
+                                       if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
+                                               vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
+                                               vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
+                                       }
+                               } else {
+                                       if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
+                                               vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
+                                       }
+                                       vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
+                                       vp->v_mount->fstypename_override[0] = '\0';
+                               }
+                               mount_unlock(vp->v_mount);
+                       }
+               }
+               break;
+               
+               default: {
+                       /* Invoke the filesystem-specific code */
+                       error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
+               }
+
+       } /* end switch stmt */
+
         /*
         /*
-        * Copy any data to user, size was
+        * if no errors, copy any data to user. Size was
          * already set and checked above.
          */
         if (error == 0 && (cmd & IOC_OUT) && size) 
                 error = copyout(data, udata, size);
         
          * already set and checked above.
          */
         if (error == 0 && (cmd & IOC_OUT) && size) 
                 error = copyout(data, udata, size);
         
-FSCtl_Exit:
-       if (memp) kfree(memp, size);
+       if (memp) {
+               kfree(memp, size);
+       }
         
         return error;
  }
         
         return error;
  }
@@ -7000,8 +9612,8 @@ fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
         /* Get the vnode for the file we are getting info on:  */
         nameiflags = 0;
         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
         /* Get the vnode for the file we are getting info on:  */
         nameiflags = 0;
         if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
-       NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, UIO_USERSPACE,
-           uap->path, ctx);
+       NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
+              UIO_USERSPACE, uap->path, ctx);
         if ((error = namei(&nd))) goto done;
         vp = nd.ni_vp;
         nameidone(&nd);
         if ((error = namei(&nd))) goto done;
         vp = nd.ni_vp;
         nameidone(&nd);
@@ -7060,26 +9672,6 @@ done:
  }
  /* end of fsctl system call */
  
  }
  /* end of fsctl system call */
  
-/*
- * An in-kernel sync for power management to call.
- */
-__private_extern__ int
-sync_internal(void)
-{
-       int error;
-
-       struct sync_args data;
-
-       int retval[2];
-
-
-       error = sync(current_proc(), &data, &retval[0]);
-
-
-       return (error);
-} /* end of sync_internal call */
-
-
  /*
   *  Retrieve the data of an extended attribute.
   */
  /*
   *  Retrieve the data of an extended attribute.
   */
@@ -7102,7 +9694,7 @@ getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
                 return (EINVAL);
  
         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
                 return (EINVAL);
  
         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
-       NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
+       NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
         if ((error = namei(&nd))) {
                 return (error);
         }
         if ((error = namei(&nd))) {
                 return (error);
         }
@@ -7113,8 +9705,10 @@ getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
                 goto out;
         }
         if (xattr_protected(attrname)) {
                 goto out;
         }
         if (xattr_protected(attrname)) {
-               error = EPERM;
-               goto out;
+               if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
+                       error = EPERM;
+                       goto out;
+               }
         }
         /*
          * the specific check for 0xffffffff is a hack to preserve
         }
         /*
          * the specific check for 0xffffffff is a hack to preserve
@@ -7140,10 +9734,10 @@ getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
         if (uap->size == 0xffffffff || uap->size == (size_t)-1)
                 goto no_uio;
  
         if (uap->size == 0xffffffff || uap->size == (size_t)-1)
                 goto no_uio;
  
-       if (uap->size > (size_t)XATTR_MAXSIZE)
-               uap->size = XATTR_MAXSIZE;
-
         if (uap->value) {
         if (uap->value) {
+               if (uap->size > (size_t)XATTR_MAXSIZE)
+                       uap->size = XATTR_MAXSIZE;
+               
                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
                                             &uio_buf[0], sizeof(uio_buf));
                 uio_addiov(auio, uap->value, uap->size);
                 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
                                             &uio_buf[0], sizeof(uio_buf));
                 uio_addiov(auio, uap->value, uap->size);
@@ -7234,7 +9828,12 @@ setxattr(proc_t p, struct setxattr_args *uap, int *retval)
                 return (EINVAL);
  
         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
                 return (EINVAL);
  
         if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
-               return (error);
+               if (error == EPERM) {
+                       /* if the string won't fit in attrname, copyinstr emits EPERM */
+                       return (ENAMETOOLONG);
+               }
+               /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
+               return error;
         }
         if (xattr_protected(attrname))
                 return(EPERM);
         }
         if (xattr_protected(attrname))
                 return(EPERM);
@@ -7243,7 +9842,7 @@ setxattr(proc_t p, struct setxattr_args *uap, int *retval)
         }
  
         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
         }
  
         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
-       NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
+       NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
         if ((error = namei(&nd))) {
                 return (error);
         }
         if ((error = namei(&nd))) {
                 return (error);
         }
@@ -7280,7 +9879,9 @@ fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
         size_t namelen;
         int error;
         char uio_buf[ UIO_SIZEOF(1) ];
         size_t namelen;
         int error;
         char uio_buf[ UIO_SIZEOF(1) ];
+#if CONFIG_FSE
         vfs_context_t ctx = vfs_context_current();
         vfs_context_t ctx = vfs_context_current();
+#endif
  
         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
                 return (EINVAL);
  
         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
                 return (EINVAL);
@@ -7344,7 +9945,7 @@ removexattr(proc_t p, struct removexattr_args *uap, int *retval)
         if (xattr_protected(attrname))
                 return(EPERM);
         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
         if (xattr_protected(attrname))
                 return(EPERM);
         nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
-       NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
+       NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
         if ((error = namei(&nd))) {
                 return (error);
         }
         if ((error = namei(&nd))) {
                 return (error);
         }
@@ -7375,7 +9976,9 @@ fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
         char attrname[XATTR_MAXNAMELEN+1];
         size_t namelen;
         int error;
         char attrname[XATTR_MAXNAMELEN+1];
         size_t namelen;
         int error;
+#if CONFIG_FSE
         vfs_context_t ctx = vfs_context_current();
         vfs_context_t ctx = vfs_context_current();
+#endif
  
         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
                 return (EINVAL);
  
         if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
                 return (EINVAL);
@@ -7428,16 +10031,16 @@ listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
                 return (EINVAL);
  
         if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
                 return (EINVAL);
  
-       nameiflags = ((uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW) | NOTRIGGER;
-       NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
+       nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
+       NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
         if ((error = namei(&nd))) {
                 return (error);
         }
         vp = nd.ni_vp;
         nameidone(&nd);
         if (uap->namebuf != 0 && uap->bufsize > 0) {
         if ((error = namei(&nd))) {
                 return (error);
         }
         vp = nd.ni_vp;
         nameidone(&nd);
         if (uap->namebuf != 0 && uap->bufsize > 0) {
-               auio = uio_createwithbuffer(1, 0, spacetype, 
-                                                                         UIO_READ, &uio_buf[0], sizeof(uio_buf));
+               auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
+                                           &uio_buf[0], sizeof(uio_buf));
                 uio_addiov(auio, uap->namebuf, uap->bufsize);
         }
  
                 uio_addiov(auio, uap->namebuf, uap->bufsize);
         }
  
@@ -7494,6 +10097,99 @@ flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
         return (error);
  }
  
         return (error);
  }
  
+static int fsgetpath_internal(
+       vfs_context_t ctx, int volfs_id, uint64_t objid,
+       vm_size_t bufsize, caddr_t buf, int *pathlen)
+{
+       int error;
+       struct mount *mp = NULL;
+       vnode_t vp;
+       int length;
+       int bpflags;
+
+       if (bufsize > PAGE_SIZE) {
+               return (EINVAL);
+       }
+
+       if (buf == NULL) {
+               return (ENOMEM);
+       }
+
+       if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
+               error = ENOTSUP;  /* unexpected failure */
+               return ENOTSUP;
+       }
+
+unionget:
+       if (objid == 2) {
+               error = VFS_ROOT(mp, &vp, ctx);
+       } else {
+               error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
+       }
+
+       if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
+               /*
+                * If the fileid isn't found and we're in a union
+                * mount volume, then see if the fileid is in the
+                * mounted-on volume.
+                */
+               struct mount *tmp = mp;
+               mp = vnode_mount(tmp->mnt_vnodecovered);
+               vfs_unbusy(tmp);
+               if (vfs_busy(mp, LK_NOWAIT) == 0)
+                       goto unionget;
+       } else {
+               vfs_unbusy(mp);
+       }
+
+       if (error) {
+               return error;
+       }
+
+#if CONFIG_MACF
+       error = mac_vnode_check_fsgetpath(ctx, vp);
+       if (error) {
+               vnode_put(vp);
+               return error;
+       }
+#endif
+
+       /* Obtain the absolute path to this vnode. */
+       bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
+       bpflags |= BUILDPATH_CHECK_MOVED;
+       error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
+       vnode_put(vp);
+
+       if (error) {
+               goto out;
+       }
+
+       AUDIT_ARG(text, buf);
+
+       if (kdebug_enable) {
+               long dbg_parms[NUMPARMS];
+                int  dbg_namelen;
+
+                dbg_namelen = (int)sizeof(dbg_parms);
+
+        if (length < dbg_namelen) {
+                       memcpy((char *)dbg_parms, buf, length);
+                       memset((char *)dbg_parms + length, 0, dbg_namelen - length);
+
+                       dbg_namelen = length;
+               } else {
+                       memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
+               }
+
+               kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
+       }
+
+       *pathlen = (user_ssize_t)length; /* may be superseded by error */
+
+out:
+       return (error);
+}
+
  /*
   * Obtain the full pathname of a file system object by id.
   *
  /*
   * Obtain the full pathname of a file system object by id.
   *
@@ -7503,12 +10199,9 @@ __private_extern__
  int
  fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
  {
  int
  fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
  {
-       vnode_t vp;
-       struct mount *mp = NULL;
         vfs_context_t ctx = vfs_context_current();
         fsid_t fsid;
         char *realpath;
         vfs_context_t ctx = vfs_context_current();
         fsid_t fsid;
         char *realpath;
-       int bpflags;
         int length;
         int error;
  
         int length;
         int error;
  
@@ -7518,6 +10211,7 @@ fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
         AUDIT_ARG(value32, fsid.val[0]);
         AUDIT_ARG(value64, uap->objid);
         /* Restrict output buffer size for now. */
         AUDIT_ARG(value32, fsid.val[0]);
         AUDIT_ARG(value64, uap->objid);
         /* Restrict output buffer size for now. */
+       
         if (uap->bufsize > PAGE_SIZE) {
                 return (EINVAL);
         }       
         if (uap->bufsize > PAGE_SIZE) {
                 return (EINVAL);
         }       
@@ -7525,29 +10219,15 @@ fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
         if (realpath == NULL) {
                 return (ENOMEM);
         }
         if (realpath == NULL) {
                 return (ENOMEM);
         }
-       /* Find the target mountpoint. */
-       if ((mp = mount_lookupby_volfsid(fsid.val[0], 1)) == NULL) {
-               error = ENOTSUP;  /* unexpected failure */
-               goto out;
-       }
-       /* Find the target vnode. */
-       if (uap->objid == 2) {
-               error = VFS_ROOT(mp, &vp, ctx);
-       } else {
-               error = VFS_VGET(mp, (ino64_t)uap->objid, &vp, ctx);
-       }
-       vfs_unbusy(mp);
-       if (error) {
-               goto out;
-       }
-       /* Obtain the absolute path to this vnode. */
-       bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
-       error = build_path(vp, realpath, uap->bufsize, &length, bpflags, ctx);
-       vnode_put(vp);
+
+       error = fsgetpath_internal(
+               ctx, fsid.val[0], uap->objid, 
+               uap->bufsize, realpath, &length);
+
         if (error) {
                 goto out;
         }
         if (error) {
                 goto out;
         }
-       AUDIT_ARG(text, realpath);
+       
         error = copyout((caddr_t)realpath, uap->buf, length);
  
         *retval = (user_ssize_t)length; /* may be superseded by error */
         error = copyout((caddr_t)realpath, uap->buf, length);
  
         *retval = (user_ssize_t)length; /* may be superseded by error */
@@ -7589,7 +10269,11 @@ munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
                 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
                 sfs.f_fsid = sfsp->f_fsid;
                 sfs.f_owner = sfsp->f_owner;
                 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
                 sfs.f_fsid = sfsp->f_fsid;
                 sfs.f_owner = sfsp->f_owner;
-               strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
+               if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
+                       strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
+               } else {
+                       strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
+               }
                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
  
                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
  
@@ -7662,7 +10346,11 @@ munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
                 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
                 sfs.f_fsid = sfsp->f_fsid;
                 sfs.f_owner = sfsp->f_owner;
                 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
                 sfs.f_fsid = sfsp->f_fsid;
                 sfs.f_owner = sfsp->f_owner;
-               strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
+               if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
+                       strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
+               } else {
+                       strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
+               }
                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
  
                 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
                 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
  
@@ -7835,3 +10523,32 @@ void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
         usbp->st_qspare[0] = sbp->st_qspare[0];
         usbp->st_qspare[1] = sbp->st_qspare[1];
  }
         usbp->st_qspare[0] = sbp->st_qspare[0];
         usbp->st_qspare[1] = sbp->st_qspare[1];
  }
+
+/*
+ * Purge buffer cache for simulating cold starts
+ */
+static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
+{
+       ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
+
+       return VNODE_RETURNED;
+}
+
+static int vfs_purge_callback(mount_t mp, __unused void * arg)
+{
+       vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
+
+       return VFS_RETURNED;
+}
+
+int
+vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
+{
+       if (!kauth_cred_issuser(kauth_cred_get()))
+               return EPERM;
+
+       vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
+
+       return 0;
+}
+