2  * Copyright (c) 1995-2020 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  29  * Copyright (c) 1989, 1993 
  30  *      The Regents of the University of California.  All rights reserved. 
  31  * (c) UNIX System Laboratories, Inc. 
  32  * All or some portions of this file are derived from material licensed 
  33  * to the University of California by American Telephone and Telegraph 
  34  * Co. or Unix System Laboratories, Inc. and are reproduced herein with 
  35  * the permission of UNIX System Laboratories, Inc. 
  37  * Redistribution and use in source and binary forms, with or without 
  38  * modification, are permitted provided that the following conditions 
  40  * 1. Redistributions of source code must retain the above copyright 
  41  *    notice, this list of conditions and the following disclaimer. 
  42  * 2. Redistributions in binary form must reproduce the above copyright 
  43  *    notice, this list of conditions and the following disclaimer in the 
  44  *    documentation and/or other materials provided with the distribution. 
  45  * 3. All advertising materials mentioning features or use of this software 
  46  *    must display the following acknowledgement: 
  47  *      This product includes software developed by the University of 
  48  *      California, Berkeley and its contributors. 
  49  * 4. Neither the name of the University nor the names of its contributors 
  50  *    may be used to endorse or promote products derived from this software 
  51  *    without specific prior written permission. 
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  65  *      @(#)vfs_syscalls.c      8.41 (Berkeley) 6/15/95 
  68  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 
  69  * support for mandatory and extensible security protections.  This notice 
  70  * is included in support of clause 2.2 (b) of the Apple Public License, 
  74 #include <sys/param.h> 
  75 #include <sys/systm.h> 
  76 #include <sys/namei.h> 
  77 #include <sys/filedesc.h> 
  78 #include <sys/kernel.h> 
  79 #include <sys/file_internal.h> 
  81 #include <sys/vnode_internal.h> 
  82 #include <sys/mount_internal.h> 
  83 #include <sys/proc_internal.h> 
  84 #include <sys/kauth.h> 
  85 #include <sys/uio_internal.h> 
  86 #include <kern/kalloc.h> 
  88 #include <sys/dirent.h> 
  90 #include <sys/sysctl.h> 
  92 #include <sys/quota.h> 
  93 #include <sys/kdebug.h> 
  94 #include <sys/fsevents.h> 
  95 #include <sys/imgsrc.h> 
  96 #include <sys/sysproto.h> 
  97 #include <sys/sysctl.h> 
  98 #include <sys/xattr.h> 
  99 #include <sys/fcntl.h> 
 100 #include <sys/fsctl.h> 
 101 #include <sys/ubc_internal.h> 
 102 #include <sys/disk.h> 
 103 #include <sys/content_protection.h> 
 104 #include <sys/clonefile.h> 
 105 #include <sys/snapshot.h> 
 106 #include <sys/priv.h> 
 107 #include <sys/fsgetpath.h> 
 108 #include <machine/cons.h> 
 109 #include <machine/limits.h> 
 110 #include <miscfs/specfs/specdev.h> 
 112 #include <vfs/vfs_disk_conditioner.h> 
 114 #include <security/audit/audit.h> 
 115 #include <bsm/audit_kevents.h> 
 117 #include <mach/mach_types.h> 
 118 #include <kern/kern_types.h> 
 119 #include <kern/kalloc.h> 
 120 #include <kern/task.h> 
 122 #include <vm/vm_pageout.h> 
 123 #include <vm/vm_protos.h> 
 125 #include <libkern/OSAtomic.h> 
 126 #include <os/atomic_private.h> 
 127 #include <pexpert/pexpert.h> 
 128 #include <IOKit/IOBSD.h> 
 131 #include <kern/host.h> 
 132 #include <kern/ipc_misc.h> 
 133 #include <mach/host_priv.h> 
 134 #include <mach/vfs_nspace.h> 
 137 #include <nfs/nfs_conf.h> 
 140 #include <miscfs/routefs/routefs.h> 
 144 #include <security/mac.h> 
 145 #include <security/mac_framework.h> 
 149 #define GET_PATH(x) \ 
 150         ((x) = get_pathbuff()) 
 151 #define RELEASE_PATH(x) \ 
 154 #define GET_PATH(x)     \ 
 155         ((x) = zalloc(ZV_NAMEI)) 
 156 #define RELEASE_PATH(x) \ 
 158 #endif /* CONFIG_FSE */ 
 160 #ifndef HFS_GET_BOOT_INFO 
 161 #define HFS_GET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00004) 
 164 #ifndef HFS_SET_BOOT_INFO 
 165 #define HFS_SET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00005) 
 168 #ifndef APFSIOC_REVERT_TO_SNAPSHOT 
 169 #define APFSIOC_REVERT_TO_SNAPSHOT  _IOW('J', 1, u_int64_t) 
 173  * If you need accounting for KM_FD_VN_DATA consider using 
 174  * ZONE_VIEW_DEFINE to define a zone view. 
 176 #define KM_FD_VN_DATA KHEAP_DEFAULT 
 178 extern void disk_conditioner_unmount(mount_t mp
); 
 180 /* struct for checkdirs iteration */ 
 185 /* callback  for checkdirs iteration */ 
 186 static int checkdirs_callback(proc_t p
, void * arg
); 
 188 static int change_dir(struct nameidata 
*ndp
, vfs_context_t ctx
); 
 189 static int checkdirs(vnode_t olddp
, vfs_context_t ctx
); 
 190 void enablequotas(struct mount 
*mp
, vfs_context_t ctx
); 
 191 static int getfsstat_callback(mount_t mp
, void * arg
); 
 192 static int getutimes(user_addr_t usrtvp
, struct timespec 
*tsp
); 
 193 static int setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec 
*ts
, int nullflag
); 
 194 static int sync_callback(mount_t
, void *); 
 195 static int munge_statfs(struct mount 
*mp
, struct vfsstatfs 
*sfsp
, 
 196     user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
, 
 197     boolean_t partial_copy
); 
 198 static int fsync_common(proc_t p
, struct fsync_args 
*uap
, int flags
); 
 199 static int mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
, 
 200     struct componentname 
*cnp
, user_addr_t fsmountargs
, 
 201     int flags
, uint32_t internal_flags
, char *labelstr
, boolean_t kernelmount
, 
 203 void vfs_notify_mount(vnode_t pdvp
); 
 205 int prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname 
*cnp
, const char *fsname
, boolean_t skip_auth
); 
 207 struct fd_vn_data 
* fg_vn_data_alloc(void); 
 210  * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename} 
 211  * Concurrent lookups (or lookups by ids) on hard links can cause the 
 212  * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter 
 213  * does) to return ENOENT as the path cannot be returned from the name cache 
 214  * alone. We have no option but to retry and hope to get one namei->reverse path 
 215  * generation done without an intervening lookup, lookup by id on the hard link 
 216  * item. This is only an issue for MAC hooks which cannot reenter the filesystem 
 217  * which currently are the MAC hooks for rename, unlink and rmdir. 
 219 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024 
 221 /* Max retry limit for rename due to vnode recycling. */ 
 222 #define MAX_RENAME_ERECYCLE_RETRIES 1024 
 224 static int rmdirat_internal(vfs_context_t
, int, user_addr_t
, enum uio_seg
, 
 227 static int fsgetpath_internal(vfs_context_t
, int, uint64_t, vm_size_t
, caddr_t
, uint32_t options
, int *); 
 229 #ifdef CONFIG_IMGSRC_ACCESS 
 230 static int authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t 
*devvpp
, vfs_context_t ctx
); 
 231 static int place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
); 
 232 static void undo_place_on_covered_vp(mount_t mp
, vnode_t vp
); 
 233 static int mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
); 
 234 static void mount_end_update(mount_t mp
); 
 235 static int relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, struct componentname 
*cnp
, const char *fsname
, vfs_context_t ctx
, boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
); 
 236 #endif /* CONFIG_IMGSRC_ACCESS */ 
 238 #if CONFIG_LOCKERBOOT 
 239 int mount_locker_protoboot(const char *fsname
, const char *mntpoint
, 
 240     const char *pbdevpath
); 
 244 #if CONFIG_MNT_ROOTSNAP 
 245 static int __attribute__ ((noinline
)) snapshot_root(int dirfd
, user_addr_t name
, uint32_t flags
, vfs_context_t ctx
); 
 247 static int __attribute__ ((noinline
)) snapshot_root(int dirfd
, user_addr_t name
, uint32_t flags
, vfs_context_t ctx
) __attribute__((unused
)); 
 251 int sync_internal(void); 
 254 int unlink1(vfs_context_t
, vnode_t
, user_addr_t
, enum uio_seg
, int); 
 256 static LCK_GRP_DECLARE(fd_vn_lck_grp
, "fd_vnode_data"); 
 257 static LCK_ATTR_DECLARE(fd_vn_lck_attr
, 0, 0); 
 259 /* vars for sync mutex */ 
 260 static LCK_GRP_DECLARE(sync_mtx_lck_grp
, "sync thread"); 
 261 static LCK_MTX_DECLARE(sync_mtx_lck
, &sync_mtx_lck_grp
); 
 263 extern lck_rw_t rootvnode_rw_lock
; 
 266  * incremented each time a mount or unmount operation occurs 
 267  * used to invalidate the cached value of the rootvp in the 
 268  * mount structure utilized by cache_lookup_path 
 270 uint32_t mount_generation 
= 0; 
 272 /* counts number of mount and unmount operations */ 
 273 unsigned int vfs_nummntops 
= 0; 
 275 /* system-wide, per-boot unique mount ID */ 
 276 static _Atomic 
uint64_t mount_unique_id 
= 1; 
 278 extern const struct fileops vnops
; 
 279 #if CONFIG_APPLEDOUBLE 
 280 extern errno_t 
rmdir_remove_orphaned_appleDouble(vnode_t
, vfs_context_t
, int *); 
 281 #endif /* CONFIG_APPLEDOUBLE */ 
 284  * Virtual File System System Calls 
 287 #if CONFIG_NFS_CLIENT || DEVFS || ROUTEFS 
 289  * Private in-kernel mounting spi (NFS only, not exported) 
 293 vfs_iskernelmount(mount_t mp
) 
 295         return (mp
->mnt_kern_flag 
& MNTK_KERNEL_MOUNT
) ? TRUE 
: FALSE
; 
 300 kernel_mount(char *fstype
, vnode_t pvp
, vnode_t vp
, const char *path
, 
 301     void *data
, __unused 
size_t datalen
, int syscall_flags
, uint32_t kern_flags
, vfs_context_t ctx
) 
 307         NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW 
| AUDITVNPATH1 
| WANTPARENT
, 
 308             UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
); 
 311          * Get the vnode to be covered if it's not supplied 
 316                         if (kern_flags 
& (KERNEL_MOUNT_SNAPSHOT 
| KERNEL_MOUNT_VOLBYROLE_MASK
)) { 
 317                                 printf("failed to locate mount-on path: %s ", path
); 
 325                 char *pnbuf 
= CAST_DOWN(char *, path
); 
 327                 nd
.ni_cnd
.cn_pnbuf 
= pnbuf
; 
 328                 nd
.ni_cnd
.cn_pnlen 
= (int)(strlen(pnbuf
) + 1); 
 332         error 
= mount_common(fstype
, pvp
, vp
, &nd
.ni_cnd
, CAST_USER_ADDR_T(data
), 
 333             syscall_flags
, kern_flags
, NULL
, TRUE
, ctx
); 
 343 #endif /* CONFIG_NFS_CLIENT || DEVFS */ 
 346  * Mount a file system. 
 350 mount(proc_t p
, struct mount_args 
*uap
, __unused 
int32_t *retval
) 
 352         struct __mac_mount_args muap
; 
 354         muap
.type 
= uap
->type
; 
 355         muap
.path 
= uap
->path
; 
 356         muap
.flags 
= uap
->flags
; 
 357         muap
.data 
= uap
->data
; 
 358         muap
.mac_p 
= USER_ADDR_NULL
; 
 359         return __mac_mount(p
, &muap
, retval
); 
 363 fmount(__unused proc_t p
, struct fmount_args 
*uap
, __unused 
int32_t *retval
) 
 365         struct componentname    cn
; 
 366         vfs_context_t           ctx 
= vfs_context_current(); 
 369         int                     flags 
= uap
->flags
; 
 370         char                    fstypename
[MFSNAMELEN
]; 
 371         char                    *labelstr 
= NULL
; /* regular mount call always sets it to NULL for __mac_mount() */ 
 375         AUDIT_ARG(fd
, uap
->fd
); 
 376         AUDIT_ARG(fflags
, flags
); 
 377         /* fstypename will get audited by mount_common */ 
 379         /* Sanity check the flags */ 
 380         if (flags 
& (MNT_IMGSRC_BY_INDEX 
| MNT_ROOTFS
)) { 
 384         if (flags 
& MNT_UNION
) { 
 388         error 
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
); 
 393         if ((error 
= file_vnode(uap
->fd
, &vp
)) != 0) { 
 397         if ((error 
= vnode_getwithref(vp
)) != 0) { 
 402         pvp 
= vnode_getparent(vp
); 
 409         memset(&cn
, 0, sizeof(struct componentname
)); 
 410         cn
.cn_pnbuf 
= zalloc_flags(ZV_NAMEI
, Z_WAITOK
); 
 411         cn
.cn_pnlen 
= MAXPATHLEN
; 
 413         if ((error 
= vn_getpath(vp
, cn
.cn_pnbuf
, &cn
.cn_pnlen
)) != 0) { 
 414                 zfree(ZV_NAMEI
, cn
.cn_pnbuf
); 
 421         error 
= mount_common(fstypename
, pvp
, vp
, &cn
, uap
->data
, flags
, 0, labelstr
, FALSE
, ctx
); 
 423         zfree(ZV_NAMEI
, cn
.cn_pnbuf
); 
 432 vfs_notify_mount(vnode_t pdvp
) 
 434         vfs_event_signal(NULL
, VQ_MOUNT
, (intptr_t)NULL
); 
 435         lock_vnode_and_post(pdvp
, NOTE_WRITE
); 
 440  *      Mount a file system taking into account MAC label behavior. 
 441  *      See mount(2) man page for more information 
 443  * Parameters:    p                        Process requesting the mount 
 444  *                uap                      User argument descriptor (see below) 
 447  * Indirect:      uap->type                Filesystem type 
 448  *                uap->path                Path to mount 
 449  *                uap->data                Mount arguments 
 450  *                uap->mac_p               MAC info 
 451  *                uap->flags               Mount flags 
 457 boolean_t root_fs_upgrade_try 
= FALSE
; 
 460 __mac_mount(struct proc 
*p
, register struct __mac_mount_args 
*uap
, __unused 
int32_t *retval
) 
 464         int need_nameidone 
= 0; 
 465         vfs_context_t ctx 
= vfs_context_current(); 
 466         char fstypename
[MFSNAMELEN
]; 
 469         char *labelstr 
= NULL
; 
 471         int flags 
= uap
->flags
; 
 473 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF 
 474         boolean_t is_64bit 
= IS_64BIT_PROCESS(p
); 
 479          * Get the fs type name from user space 
 481         error 
= copyinstr(uap
->type
, fstypename
, MFSNAMELEN
, &dummy
); 
 487          * Get the vnode to be covered 
 489         NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW 
| AUDITVNPATH1 
| WANTPARENT
, 
 490             UIO_USERSPACE
, uap
->path
, ctx
); 
 499 #ifdef CONFIG_IMGSRC_ACCESS 
 500         /* Mounting image source cannot be batched with other operations */ 
 501         if (flags 
== MNT_IMGSRC_BY_INDEX
) { 
 502                 error 
= relocate_imageboot_source(pvp
, vp
, &nd
.ni_cnd
, fstypename
, 
 503                     ctx
, is_64bit
, uap
->data
, (flags 
== MNT_IMGSRC_BY_INDEX
)); 
 506 #endif /* CONFIG_IMGSRC_ACCESS */ 
 510          * Get the label string (if any) from user space 
 512         if (uap
->mac_p 
!= USER_ADDR_NULL
) { 
 517                         struct user64_mac mac64
; 
 518                         error 
= copyin(uap
->mac_p
, &mac64
, sizeof(mac64
)); 
 519                         mac
.m_buflen 
= (user_size_t
)mac64
.m_buflen
; 
 520                         mac
.m_string 
= (user_addr_t
)mac64
.m_string
; 
 522                         struct user32_mac mac32
; 
 523                         error 
= copyin(uap
->mac_p
, &mac32
, sizeof(mac32
)); 
 524                         mac
.m_buflen 
= mac32
.m_buflen
; 
 525                         mac
.m_string 
= mac32
.m_string
; 
 530                 if ((mac
.m_buflen 
> MAC_MAX_LABEL_BUF_LEN
) || 
 531                     (mac
.m_buflen 
< 2)) { 
 535                 labelsz 
= mac
.m_buflen
; 
 536                 labelstr 
= kheap_alloc(KHEAP_TEMP
, labelsz
, Z_WAITOK
); 
 537                 error 
= copyinstr(mac
.m_string
, labelstr
, mac
.m_buflen
, &ulen
); 
 541                 AUDIT_ARG(mac_string
, labelstr
); 
 543 #endif /* CONFIG_MACF */ 
 545         AUDIT_ARG(fflags
, flags
); 
 548         if (flags 
& MNT_UNION
) { 
 549                 /* No union mounts on release kernels */ 
 555         if ((vp
->v_flag 
& VROOT
) && 
 556             (vp
->v_mount
->mnt_flag 
& MNT_ROOTFS
)) { 
 557                 if (!(flags 
& MNT_UNION
)) { 
 561                          * For a union mount on '/', treat it as fresh 
 562                          * mount instead of update. 
 563                          * Otherwise, union mouting on '/' used to panic the 
 564                          * system before, since mnt_vnodecovered was found to 
 565                          * be NULL for '/' which is required for unionlookup 
 566                          * after it gets ENOENT on union mount. 
 568                         flags 
= (flags 
& ~(MNT_UPDATE
)); 
 572                 if ((flags 
& MNT_RDONLY
) == 0) { 
 573                         /* Release kernels are not allowed to mount "/" as rw */ 
 579                  * See 7392553 for more details on why this check exists. 
 580                  * Suffice to say: If this check is ON and something tries 
 581                  * to mount the rootFS RW, we'll turn off the codesign 
 582                  * bitmap optimization. 
 584 #if CHECK_CS_VALIDATION_BITMAP 
 585                 if ((flags 
& MNT_RDONLY
) == 0) { 
 586                         root_fs_upgrade_try 
= TRUE
; 
 591         error 
= mount_common(fstypename
, pvp
, vp
, &nd
.ni_cnd
, uap
->data
, flags
, 0, 
 592             labelstr
, FALSE
, ctx
); 
 597         kheap_free(KHEAP_DEFAULT
, labelstr
, labelsz
); 
 598 #endif /* CONFIG_MACF */ 
 606         if (need_nameidone
) { 
 614  * common mount implementation (final stage of mounting) 
 617  *  fstypename  file system type (ie it's vfs name) 
 618  *  pvp         parent of covered vnode 
 620  *  cnp         component name (ie path) of covered vnode 
 621  *  flags       generic mount flags 
 622  *  fsmountargs file system specific data 
 623  *  labelstr    optional MAC label 
 624  *  kernelmount TRUE for mounts initiated from inside the kernel 
 625  *  ctx         caller's context 
 628 mount_common(char *fstypename
, vnode_t pvp
, vnode_t vp
, 
 629     struct componentname 
*cnp
, user_addr_t fsmountargs
, int flags
, uint32_t internal_flags
, 
 630     char *labelstr
, boolean_t kernelmount
, vfs_context_t ctx
) 
 633 #pragma unused(labelstr) 
 635         struct vnode 
*devvp 
= NULLVP
; 
 636         struct vnode 
*device_vnode 
= NULLVP
; 
 641         struct vfstable 
*vfsp 
= (struct vfstable 
*)0; 
 642         struct proc 
*p 
= vfs_context_proc(ctx
); 
 644         bool flag_set 
= false; 
 645         user_addr_t devpath 
= USER_ADDR_NULL
; 
 648         boolean_t vfsp_ref 
= FALSE
; 
 649         boolean_t is_rwlock_locked 
= FALSE
; 
 650         boolean_t did_rele 
= FALSE
; 
 651         boolean_t have_usecount 
= FALSE
; 
 652         boolean_t did_set_lmount 
= FALSE
; 
 654 #if CONFIG_ROSV_STARTUP || CONFIG_MOUNT_VM || CONFIG_BASESYSTEMROOT 
 655         /* Check for mutually-exclusive flag bits */ 
 656         uint32_t checkflags 
= (internal_flags 
& (KERNEL_MOUNT_VOLBYROLE_MASK 
| KERNEL_MOUNT_BASESYSTEMROOT
)); 
 658         while (checkflags 
!= 0) { 
 659                 checkflags 
&= (checkflags 
- 1); 
 664                 //not allowed to request multiple mount-by-role flags 
 671          * Process an update for an existing mount 
 673         if (flags 
& MNT_UPDATE
) { 
 674                 if ((vp
->v_flag 
& VROOT
) == 0) { 
 680                 /* if unmount or mount in progress, return error */ 
 682                 if (mp
->mnt_lflag 
& (MNT_LUNMOUNT 
| MNT_LMOUNT
)) { 
 687                 mp
->mnt_lflag 
|= MNT_LMOUNT
; 
 688                 did_set_lmount 
= TRUE
; 
 690                 lck_rw_lock_exclusive(&mp
->mnt_rwlock
); 
 691                 is_rwlock_locked 
= TRUE
; 
 693                  * We only allow the filesystem to be reloaded if it 
 694                  * is currently mounted read-only. 
 696                 if ((flags 
& MNT_RELOAD
) && 
 697                     ((mp
->mnt_flag 
& MNT_RDONLY
) == 0)) { 
 703                  * If content protection is enabled, update mounts are not 
 704                  * allowed to turn it off. 
 706                 if ((mp
->mnt_flag 
& MNT_CPROTECT
) && 
 707                     ((flags 
& MNT_CPROTECT
) == 0)) { 
 713                  * can't turn off MNT_REMOVABLE either but it may be an unexpected 
 714                  * failure to return an error for this so we'll just silently 
 715                  * add it if it is not passed in. 
 717                 if ((mp
->mnt_flag 
& MNT_REMOVABLE
) && 
 718                     ((flags 
& MNT_REMOVABLE
) == 0)) { 
 719                         flags 
|= MNT_REMOVABLE
; 
 722                 /* Can't downgrade the backer of the root FS */ 
 723                 if ((mp
->mnt_kern_flag 
& MNTK_BACKS_ROOT
) && 
 724                     (!vfs_isrdonly(mp
)) && (flags 
& MNT_RDONLY
)) { 
 730                  * Only root, or the user that did the original mount is 
 731                  * permitted to update it. 
 733                 if (mp
->mnt_vfsstat
.f_owner 
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) && 
 734                     (error 
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) { 
 738                 error 
= mac_mount_check_remount(ctx
, mp
); 
 744                  * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, 
 745                  * and MNT_NOEXEC if mount point is already MNT_NOEXEC. 
 747                 if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) { 
 748                         flags 
|= MNT_NOSUID 
| MNT_NODEV
; 
 749                         if (mp
->mnt_flag 
& MNT_NOEXEC
) { 
 758                 mp
->mnt_flag 
|= flags 
& (MNT_RELOAD 
| MNT_FORCE 
| MNT_UPDATE
); 
 760                 vfsp 
= mp
->mnt_vtable
; 
 765          * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and 
 766          * MNT_NOEXEC if mount point is already MNT_NOEXEC. 
 768         if ((!kernelmount
) && suser(vfs_context_ucred(ctx
), NULL
)) { 
 769                 flags 
|= MNT_NOSUID 
| MNT_NODEV
; 
 770                 if (vp
->v_mount
->mnt_flag 
& MNT_NOEXEC
) { 
 775         /* XXXAUDIT: Should we capture the type on the error path as well? */ 
 776         AUDIT_ARG(text
, fstypename
); 
 778         for (vfsp 
= vfsconf
; vfsp
; vfsp 
= vfsp
->vfc_next
) { 
 779                 if (!strncmp(vfsp
->vfc_name
, fstypename
, MFSNAMELEN
)) { 
 780                         vfsp
->vfc_refcount
++; 
 792          * VFC_VFSLOCALARGS is not currently supported for kernel mounts, 
 793          * except in ROSV configs and for the initial BaseSystem root. 
 795         if (kernelmount 
&& (vfsp
->vfc_vfsflags 
& VFC_VFSLOCALARGS
) && 
 796             ((internal_flags 
& KERNEL_MOUNT_VOLBYROLE_MASK
) == 0) && 
 797             ((internal_flags 
& KERNEL_MOUNT_BASESYSTEMROOT
) == 0)) { 
 798                 error 
= EINVAL
;  /* unsupported request */ 
 802         error 
= prepare_coveredvp(vp
, ctx
, cnp
, fstypename
, ((internal_flags 
& KERNEL_MOUNT_NOAUTH
) != 0)); 
 808          * Allocate and initialize the filesystem (mount_t) 
 810         mp 
= zalloc_flags(mount_zone
, Z_WAITOK 
| Z_ZERO
); 
 813         /* Initialize the default IO constraints */ 
 814         mp
->mnt_maxreadcnt 
= mp
->mnt_maxwritecnt 
= MAXPHYS
; 
 815         mp
->mnt_segreadcnt 
= mp
->mnt_segwritecnt 
= 32; 
 816         mp
->mnt_maxsegreadsize 
= mp
->mnt_maxreadcnt
; 
 817         mp
->mnt_maxsegwritesize 
= mp
->mnt_maxwritecnt
; 
 818         mp
->mnt_devblocksize 
= DEV_BSIZE
; 
 819         mp
->mnt_alignmentmask 
= PAGE_MASK
; 
 820         mp
->mnt_ioqueue_depth 
= MNT_DEFAULT_IOQUEUE_DEPTH
; 
 823         mp
->mnt_realrootvp 
= NULLVP
; 
 824         mp
->mnt_authcache_ttl 
= CACHED_LOOKUP_RIGHT_TTL
; 
 826         mp
->mnt_lflag 
|= MNT_LMOUNT
; 
 827         did_set_lmount 
= TRUE
; 
 829         TAILQ_INIT(&mp
->mnt_vnodelist
); 
 830         TAILQ_INIT(&mp
->mnt_workerqueue
); 
 831         TAILQ_INIT(&mp
->mnt_newvnodes
); 
 833         lck_rw_lock_exclusive(&mp
->mnt_rwlock
); 
 834         is_rwlock_locked 
= TRUE
; 
 835         mp
->mnt_op 
= vfsp
->vfc_vfsops
; 
 836         mp
->mnt_vtable 
= vfsp
; 
 837         //mp->mnt_stat.f_type = vfsp->vfc_typenum; 
 838         mp
->mnt_flag 
|= vfsp
->vfc_flags 
& MNT_VISFLAGMASK
; 
 839         strlcpy(mp
->mnt_vfsstat
.f_fstypename
, vfsp
->vfc_name
, MFSTYPENAMELEN
); 
 841                 int pathlen 
= MAXPATHLEN
; 
 843                 if (vn_getpath_ext(vp
, pvp
, mp
->mnt_vfsstat
.f_mntonname
, &pathlen
, VN_GETPATH_FSENTER
)) { 
 844                         strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
); 
 847         mp
->mnt_vnodecovered 
= vp
; 
 848         mp
->mnt_vfsstat
.f_owner 
= kauth_cred_getuid(vfs_context_ucred(ctx
)); 
 849         mp
->mnt_throttle_mask 
= LOWPRI_MAX_NUM_DEV 
- 1; 
 850         mp
->mnt_devbsdunit 
= 0; 
 851         mp
->mnt_mount_id 
= os_atomic_inc_orig(&mount_unique_id
, relaxed
); 
 853         /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */ 
 854         vfs_setowner(mp
, KAUTH_UID_NONE
, KAUTH_GID_NONE
); 
 856 #if CONFIG_NFS_CLIENT || DEVFS || ROUTEFS 
 858                 mp
->mnt_kern_flag 
|= MNTK_KERNEL_MOUNT
; 
 860         if ((internal_flags 
& KERNEL_MOUNT_PERMIT_UNMOUNT
) != 0) { 
 861                 mp
->mnt_kern_flag 
|= MNTK_PERMIT_UNMOUNT
; 
 863 #endif /* CONFIG_NFS_CLIENT || DEVFS */ 
 865         if (KERNEL_MOUNT_DEVFS 
& internal_flags
) { 
 866                 // kernel mounted devfs 
 867                 mp
->mnt_kern_flag 
|= MNTK_SYSTEM
; 
 873          * Set the mount level flags. 
 875         if (flags 
& MNT_RDONLY
) { 
 876                 mp
->mnt_flag 
|= MNT_RDONLY
; 
 877         } else if (mp
->mnt_flag 
& MNT_RDONLY
) { 
 878                 // disallow read/write upgrades of file systems that 
 879                 // had the TYPENAME_OVERRIDE feature set. 
 880                 if (mp
->mnt_kern_flag 
& MNTK_TYPENAME_OVERRIDE
) { 
 884                 mp
->mnt_kern_flag 
|= MNTK_WANTRDWR
; 
 886         mp
->mnt_flag 
&= ~(MNT_NOSUID 
| MNT_NOEXEC 
| MNT_NODEV 
| 
 887             MNT_SYNCHRONOUS 
| MNT_UNION 
| MNT_ASYNC 
| 
 888             MNT_UNKNOWNPERMISSIONS 
| MNT_DONTBROWSE 
| 
 889             MNT_AUTOMOUNTED 
| MNT_DEFWRITE 
| MNT_NOATIME 
| MNT_STRICTATIME 
| 
 890             MNT_QUARANTINE 
| MNT_CPROTECT
); 
 895          * On release builds of iOS based platforms, always enforce NOSUID on 
 896          * all mounts. We do this here because we can catch update mounts as well as 
 897          * non-update mounts in this case. 
 899         mp
->mnt_flag 
|= (MNT_NOSUID
); 
 903         mp
->mnt_flag 
|= flags 
& (MNT_NOSUID 
| MNT_NOEXEC 
| MNT_NODEV 
| 
 904             MNT_SYNCHRONOUS 
| MNT_UNION 
| MNT_ASYNC 
| 
 905             MNT_UNKNOWNPERMISSIONS 
| MNT_DONTBROWSE 
| 
 906             MNT_AUTOMOUNTED 
| MNT_DEFWRITE 
| MNT_NOATIME 
| MNT_STRICTATIME 
| 
 907             MNT_QUARANTINE 
| MNT_CPROTECT
); 
 910         if (flags 
& MNT_MULTILABEL
) { 
 911                 if (vfsp
->vfc_vfsflags 
& VFC_VFSNOMACLABEL
) { 
 915                 mp
->mnt_flag 
|= MNT_MULTILABEL
; 
 919          * Process device path for local file systems if requested. 
 921          * Snapshot and mount-by-role mounts do not use this path; they are 
 922          * passing other opaque data in the device path field. 
 924          * Basesystemroot mounts pass a device path to be resolved here, 
 925          * but it's just a char * already inside the kernel, which 
 926          * kernel_mount() shoved into a user_addr_t to call us. So for such 
 927          * mounts we must skip copyin (both of the address and of the string 
 930         if (vfsp
->vfc_vfsflags 
& VFC_VFSLOCALARGS 
&& 
 931             !(internal_flags 
& (KERNEL_MOUNT_SNAPSHOT 
| KERNEL_MOUNT_VOLBYROLE_MASK
))) { 
 932                 boolean_t do_copyin_devpath 
= true; 
 933 #if CONFIG_BASESYSTEMROOT 
 934                 if (internal_flags 
& KERNEL_MOUNT_BASESYSTEMROOT
) { 
 935                         // KERNEL_MOUNT_BASESYSTEMROOT implies subtle behavior worh nothing: 
 936                         // We have been passed fsmountargs, which is typed as a user_addr_t, 
 937                         // but is actually a char ** pointing to a (kernelspace) string. 
 938                         // We manually unpack it with a series of casts and dereferences 
 939                         // that reverses what was done just above us on the stack in 
 940                         // imageboot_pivot_image(). 
 941                         // After retrieving the path to the dev node (which we will NDINIT 
 942                         // in a moment), we pass NULL fsmountargs on to the filesystem. 
 943                         _Static_assert(sizeof(char **) == sizeof(fsmountargs
), "fsmountargs should fit a (kernel) address"); 
 944                         char **devnamepp 
= (char **)fsmountargs
; 
 945                         char *devnamep 
= *devnamepp
; 
 946                         devpath 
= CAST_USER_ADDR_T(devnamep
); 
 947                         do_copyin_devpath 
= false; 
 948                         fsmountargs 
= USER_ADDR_NULL
; 
 950                         //Now that we have a mp, denote that this mount is for the basesystem. 
 951                         mp
->mnt_supl_kern_flag 
|= MNTK_SUPL_BASESYSTEM
; 
 953 #endif // CONFIG_BASESYSTEMROOT 
 955                 if (do_copyin_devpath
) { 
 956                         if (vfs_context_is64bit(ctx
)) { 
 957                                 if ((error 
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
)))) { 
 960                                 fsmountargs 
+= sizeof(devpath
); 
 963                                 if ((error 
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
)))) { 
 966                                 /* munge into LP64 addr */ 
 967                                 devpath 
= CAST_USER_ADDR_T(tmp
); 
 968                                 fsmountargs 
+= sizeof(tmp
); 
 972                 /* Lookup device and authorize access to it */ 
 976                         enum uio_seg seg 
= UIO_USERSPACE
; 
 977 #if CONFIG_BASESYSTEMROOT 
 978                         if (internal_flags 
& KERNEL_MOUNT_BASESYSTEMROOT
) { 
 981 #endif // CONFIG_BASESYSTEMROOT 
 983                         NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW
, seg
, devpath
, ctx
); 
 984                         if ((error 
= namei(&nd
))) { 
 988                         strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
); 
 993                         if (devvp
->v_type 
!= VBLK
) { 
 997                         if (major(devvp
->v_rdev
) >= nblkdev
) { 
1002                          * If mount by non-root, then verify that user has necessary 
1003                          * permissions on the device. 
1005                         if (suser(vfs_context_ucred(ctx
), NULL
) != 0) { 
1006                                 mode_t accessmode 
= KAUTH_VNODE_READ_DATA
; 
1008                                 if ((mp
->mnt_flag 
& MNT_RDONLY
) == 0) { 
1009                                         accessmode 
|= KAUTH_VNODE_WRITE_DATA
; 
1011                                 if ((error 
= vnode_authorize(devvp
, NULL
, accessmode
, ctx
)) != 0) { 
1016                 /* On first mount, preflight and open device */ 
1017                 if (devpath 
&& ((flags 
& MNT_UPDATE
) == 0)) { 
1018                         if ((error 
= vnode_ref(devvp
))) { 
1022                          * Disallow multiple mounts of the same device. 
1023                          * Disallow mounting of a device that is currently in use 
1024                          * (except for root, which might share swap device for miniroot). 
1025                          * Flush out any old buffers remaining from a previous use. 
1027                         if ((error 
= vfs_mountedon(devvp
))) { 
1031                         if (vcount(devvp
) > 1 && !(vfs_flags(mp
) & MNT_ROOTFS
)) { 
1035                         if ((error 
= VNOP_FSYNC(devvp
, MNT_WAIT
, ctx
))) { 
1039                         if ((error 
= buf_invalidateblks(devvp
, BUF_WRITE_DATA
, 0, 0))) { 
1043                         ronly 
= (mp
->mnt_flag 
& MNT_RDONLY
) != 0; 
1045                         error 
= mac_vnode_check_open(ctx
, 
1047                             ronly 
? FREAD 
: FREAD 
| FWRITE
); 
1052                         if ((error 
= VNOP_OPEN(devvp
, ronly 
? FREAD 
: FREAD 
| FWRITE
, ctx
))) { 
1056                         mp
->mnt_devvp 
= devvp
; 
1057                         device_vnode 
= devvp
; 
1058                 } else if ((mp
->mnt_flag 
& MNT_RDONLY
) && 
1059                     (mp
->mnt_kern_flag 
& MNTK_WANTRDWR
) && 
1060                     (device_vnode 
= mp
->mnt_devvp
)) { 
1064                          * If upgrade to read-write by non-root, then verify 
1065                          * that user has necessary permissions on the device. 
1067                         vnode_getalways(device_vnode
); 
1069                         if (suser(vfs_context_ucred(ctx
), NULL
) && 
1070                             (error 
= vnode_authorize(device_vnode
, NULL
, 
1071                             KAUTH_VNODE_READ_DATA 
| KAUTH_VNODE_WRITE_DATA
, 
1073                                 vnode_put(device_vnode
); 
1077                         /* Tell the device that we're upgrading */ 
1078                         dev 
= (dev_t
)device_vnode
->v_rdev
; 
1081                         if ((u_int
)maj 
>= (u_int
)nblkdev
) { 
1082                                 panic("Volume mounted on a device with invalid major number."); 
1085                         error 
= bdevsw
[maj
].d_open(dev
, FREAD 
| FWRITE
, S_IFBLK
, p
); 
1086                         vnode_put(device_vnode
); 
1087                         device_vnode 
= NULLVP
; 
1092         } // localargs && !(snapshot | data | vm) 
1095         if ((flags 
& MNT_UPDATE
) == 0) { 
1096                 mac_mount_label_init(mp
); 
1097                 mac_mount_label_associate(ctx
, mp
); 
1100                 if ((flags 
& MNT_UPDATE
) != 0) { 
1101                         error 
= mac_mount_check_label_update(ctx
, mp
); 
1109          * Mount the filesystem.  We already asserted that internal_flags 
1110          * cannot have more than one mount-by-role bit set. 
1112         if (internal_flags 
& KERNEL_MOUNT_SNAPSHOT
) { 
1113                 error 
= VFS_IOCTL(mp
, VFSIOC_MOUNT_SNAPSHOT
, 
1114                     (caddr_t
)fsmountargs
, 0, ctx
); 
1115         } else if (internal_flags 
& KERNEL_MOUNT_DATAVOL
) { 
1116 #if CONFIG_ROSV_STARTUP 
1117                 struct mount 
*origin_mp 
= (struct mount
*)fsmountargs
; 
1118                 fs_role_mount_args_t frma 
= {origin_mp
, VFS_DATA_ROLE
}; 
1119                 error 
= VFS_IOCTL(mp
, VFSIOC_MOUNT_BYROLE
, (caddr_t
)&frma
, 0, ctx
); 
1121                         printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_DATA_ROLE
, error
); 
1123                         /* Mark volume associated with system volume */ 
1124                         mp
->mnt_kern_flag 
|= MNTK_SYSTEM
; 
1126                         /* Attempt to acquire the mnt_devvp and set it up */ 
1127                         struct vnode 
*mp_devvp 
= NULL
; 
1128                         if (mp
->mnt_vfsstat
.f_mntfromname
[0] != 0) { 
1129                                 errno_t lerr 
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
, 
1130                                     0, &mp_devvp
, vfs_context_kernel()); 
1132                                         mp
->mnt_devvp 
= mp_devvp
; 
1133                                         //vnode_lookup took an iocount, need to drop it. 
1134                                         vnode_put(mp_devvp
); 
1135                                         // now set `device_vnode` to the devvp that was acquired. 
1136                                         // this is needed in order to ensure vfs_init_io_attributes is invoked. 
1137                                         // note that though the iocount above was dropped, the mount acquires 
1138                                         // an implicit reference against the device. 
1139                                         device_vnode 
= mp_devvp
; 
1146         } else if (internal_flags 
& KERNEL_MOUNT_VMVOL
) { 
1148                 struct mount 
*origin_mp 
= (struct mount
*)fsmountargs
; 
1149                 fs_role_mount_args_t frma 
= {origin_mp
, VFS_VM_ROLE
}; 
1150                 error 
= VFS_IOCTL(mp
, VFSIOC_MOUNT_BYROLE
, (caddr_t
)&frma
, 0, ctx
); 
1152                         printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_VM_ROLE
, error
); 
1154                         /* Mark volume associated with system volume and a swap mount */ 
1155                         mp
->mnt_kern_flag 
|= (MNTK_SYSTEM 
| MNTK_SWAP_MOUNT
); 
1156                         /* Attempt to acquire the mnt_devvp and set it up */ 
1157                         struct vnode 
*mp_devvp 
= NULL
; 
1158                         if (mp
->mnt_vfsstat
.f_mntfromname
[0] != 0) { 
1159                                 errno_t lerr 
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
, 
1160                                     0, &mp_devvp
, vfs_context_kernel()); 
1162                                         mp
->mnt_devvp 
= mp_devvp
; 
1163                                         //vnode_lookup took an iocount, need to drop it. 
1164                                         vnode_put(mp_devvp
); 
1166                                         // now set `device_vnode` to the devvp that was acquired. 
1167                                         // note that though the iocount above was dropped, the mount acquires 
1168                                         // an implicit reference against the device. 
1169                                         device_vnode 
= mp_devvp
; 
1176         } else if ((internal_flags 
& KERNEL_MOUNT_PREBOOTVOL
) || (internal_flags 
& KERNEL_MOUNT_RECOVERYVOL
)) { 
1177 #if CONFIG_MOUNT_PREBOOTRECOVERY 
1178                 struct mount 
*origin_mp 
= (struct mount
*)fsmountargs
; 
1179                 uint32_t mount_role 
= 0; 
1180                 if (internal_flags 
& KERNEL_MOUNT_PREBOOTVOL
) { 
1181                         mount_role 
= VFS_PREBOOT_ROLE
; 
1182                 } else if (internal_flags 
& KERNEL_MOUNT_RECOVERYVOL
) { 
1183                         mount_role 
= VFS_RECOVERY_ROLE
; 
1186                 if (mount_role 
!= 0) { 
1187                         fs_role_mount_args_t frma 
= {origin_mp
, mount_role
}; 
1188                         error 
= VFS_IOCTL(mp
, VFSIOC_MOUNT_BYROLE
, (caddr_t
)&frma
, 0, ctx
); 
1190                                 printf("MOUNT-BY-ROLE (%d) failed! (%d)", mount_role
, error
); 
1192                                 // NOT YET - need to qualify how this interacts with shutdown, ERP/ERB, etc 
1193                                 /* Mark volume associated with system volume */ 
1194                                 //mp->mnt_kern_flag |= MNTK_SYSTEM; 
1195                                 /* Attempt to acquire the mnt_devvp and set it up */ 
1196                                 struct vnode 
*mp_devvp 
= NULL
; 
1197                                 if (mp
->mnt_vfsstat
.f_mntfromname
[0] != 0) { 
1198                                         errno_t lerr 
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
, 
1199                                             0, &mp_devvp
, vfs_context_kernel()); 
1201                                                 mp
->mnt_devvp 
= mp_devvp
; 
1202                                                 //vnode_lookup took an iocount, need to drop it. 
1203                                                 vnode_put(mp_devvp
); 
1205                                                 // now set `device_vnode` to the devvp that was acquired. 
1206                                                 // note that though the iocount above was dropped, the mount acquires 
1207                                                 // an implicit reference against the device. 
1208                                                 device_vnode 
= mp_devvp
; 
1213                         printf("MOUNT-BY-ROLE (%d) failed - ROLE UNRECOGNIZED! (%d)", mount_role
, error
); 
1220                 error 
= VFS_MOUNT(mp
, device_vnode
, fsmountargs
, ctx
); 
1223         if (flags 
& MNT_UPDATE
) { 
1224                 if (mp
->mnt_kern_flag 
& MNTK_WANTRDWR
) { 
1225                         mp
->mnt_flag 
&= ~MNT_RDONLY
; 
1228                     (MNT_UPDATE 
| MNT_RELOAD 
| MNT_FORCE
); 
1229                 mp
->mnt_kern_flag 
&= ~MNTK_WANTRDWR
; 
1231                         mp
->mnt_flag 
= flag
;  /* restore flag value */ 
1233                 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
); 
1234                 lck_rw_done(&mp
->mnt_rwlock
); 
1235                 is_rwlock_locked 
= FALSE
; 
1237                         enablequotas(mp
, ctx
); 
1243          * Put the new filesystem on the mount list after root. 
1246                 struct vfs_attr vfsattr
; 
1248                 error 
= mac_mount_check_mount_late(ctx
, mp
); 
1253                 if (vfs_flags(mp
) & MNT_MULTILABEL
) { 
1254                         error 
= VFS_ROOT(mp
, &rvp
, ctx
); 
1256                                 printf("%s() VFS_ROOT returned %d\n", __func__
, error
); 
1259                         error 
= vnode_label(mp
, NULL
, rvp
, NULL
, 0, ctx
); 
1261                          * drop reference provided by VFS_ROOT 
1271                 vnode_lock_spin(vp
); 
1272                 CLR(vp
->v_flag
, VMOUNT
); 
1273                 vp
->v_mountedhere 
= mp
; 
1277                  * taking the name_cache_lock exclusively will 
1278                  * insure that everyone is out of the fast path who 
1279                  * might be trying to use a now stale copy of 
1280                  * vp->v_mountedhere->mnt_realrootvp 
1281                  * bumping mount_generation causes the cached values 
1286                 name_cache_unlock(); 
1288                 error 
= vnode_ref(vp
); 
1293                 have_usecount 
= TRUE
; 
1295                 error 
= checkdirs(vp
, ctx
); 
1297                         /* Unmount the filesystem as cdir/rdirs cannot be updated */ 
1301                  * there is no cleanup code here so I have made it void 
1302                  * we need to revisit this 
1304                 (void)VFS_START(mp
, 0, ctx
); 
1306                 if (mount_list_add(mp
) != 0) { 
1308                          * The system is shutting down trying to umount 
1309                          * everything, so fail with a plausible errno. 
1314                 lck_rw_done(&mp
->mnt_rwlock
); 
1315                 is_rwlock_locked 
= FALSE
; 
1317                 /* Check if this mounted file system supports EAs or named streams. */ 
1318                 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */ 
1319                 VFSATTR_INIT(&vfsattr
); 
1320                 VFSATTR_WANTED(&vfsattr
, f_capabilities
); 
1321                 if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "webdav", sizeof("webdav")) != 0 && 
1322                     vfs_getattr(mp
, &vfsattr
, ctx
) == 0 && 
1323                     VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) { 
1324                         if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
) && 
1325                             (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
)) { 
1326                                 mp
->mnt_kern_flag 
|= MNTK_EXTENDED_ATTRS
; 
1329                         if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
) && 
1330                             (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
)) { 
1331                                 mp
->mnt_kern_flag 
|= MNTK_NAMED_STREAMS
; 
1334                         /* Check if this file system supports path from id lookups. */ 
1335                         if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
) && 
1336                             (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
)) { 
1337                                 mp
->mnt_kern_flag 
|= MNTK_PATH_FROM_ID
; 
1338                         } else if (mp
->mnt_flag 
& MNT_DOVOLFS
) { 
1339                                 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */ 
1340                                 mp
->mnt_kern_flag 
|= MNTK_PATH_FROM_ID
; 
1343                         if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
) && 
1344                             (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_DIR_HARDLINKS
)) { 
1345                                 mp
->mnt_kern_flag 
|= MNTK_DIR_HARDLINKS
; 
1348                 if (mp
->mnt_vtable
->vfc_vfsflags 
& VFC_VFSNATIVEXATTR
) { 
1349                         mp
->mnt_kern_flag 
|= MNTK_EXTENDED_ATTRS
; 
1351                 if (mp
->mnt_vtable
->vfc_vfsflags 
& VFC_VFSPREFLIGHT
) { 
1352                         mp
->mnt_kern_flag 
|= MNTK_UNMOUNT_PREFLIGHT
; 
1354                 /* increment the operations count */ 
1355                 OSAddAtomic(1, &vfs_nummntops
); 
1356                 enablequotas(mp
, ctx
); 
1359                         device_vnode
->v_specflags 
|= SI_MOUNTEDON
; 
1362                          *   cache the IO attributes for the underlying physical media... 
1363                          *   an error return indicates the underlying driver doesn't 
1364                          *   support all the queries necessary... however, reasonable 
1365                          *   defaults will have been set, so no reason to bail or care 
1367                         vfs_init_io_attributes(device_vnode
, mp
); 
1370                 /* Now that mount is setup, notify the listeners */ 
1371                 vfs_notify_mount(pvp
); 
1372                 IOBSDMountChange(mp
, kIOMountChangeMount
); 
1374                 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */ 
1375                 if (mp
->mnt_vnodelist
.tqh_first 
!= NULL
) { 
1376                         panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.", 
1377                             mp
->mnt_vtable
->vfc_name
, error
); 
1380                 vnode_lock_spin(vp
); 
1381                 CLR(vp
->v_flag
, VMOUNT
); 
1384                 mp
->mnt_vtable
->vfc_refcount
--; 
1385                 mount_list_unlock(); 
1388                         vnode_rele(device_vnode
); 
1389                         VNOP_CLOSE(device_vnode
, ronly 
? FREAD 
: FREAD 
| FWRITE
, ctx
); 
1391                 lck_rw_done(&mp
->mnt_rwlock
); 
1392                 is_rwlock_locked 
= FALSE
; 
1395                  * if we get here, we have a mount structure that needs to be freed, 
1396                  * but since the coveredvp hasn't yet been updated to point at it, 
1397                  * no need to worry about other threads holding a crossref on this mp 
1398                  * so it's ok to just free it 
1400                 mount_lock_destroy(mp
); 
1402                 mac_mount_label_destroy(mp
); 
1404                 zfree(mount_zone
, mp
); 
1405                 did_set_lmount 
= false; 
1409          * drop I/O count on the device vp if there was one 
1411         if (devpath 
&& devvp
) { 
1415         if (did_set_lmount
) { 
1416                 mount_lock_spin(mp
); 
1417                 mp
->mnt_lflag 
&= ~MNT_LMOUNT
; 
1423 /* Error condition exits */ 
1425         (void)VFS_UNMOUNT(mp
, MNT_FORCE
, ctx
); 
1428          * If the mount has been placed on the covered vp, 
1429          * it may have been discovered by now, so we have 
1430          * to treat this just like an unmount 
1432         mount_lock_spin(mp
); 
1433         mp
->mnt_lflag 
|= MNT_LDEAD
; 
1436         if (device_vnode 
!= NULLVP
) { 
1437                 vnode_rele(device_vnode
); 
1438                 VNOP_CLOSE(device_vnode
, mp
->mnt_flag 
& MNT_RDONLY 
? FREAD 
: FREAD 
| FWRITE
, 
1443         vnode_lock_spin(vp
); 
1446         vp
->v_mountedhere 
= (mount_t
) 0; 
1450         if (have_usecount
) { 
1454         if (devpath 
&& ((flags 
& MNT_UPDATE
) == 0) && (!did_rele
)) { 
1458         if (devpath 
&& devvp
) { 
1462         /* Release mnt_rwlock only when it was taken */ 
1463         if (is_rwlock_locked 
== TRUE
) { 
1465                         mp
->mnt_flag 
= flag
;  /* restore mnt_flag value */ 
1467                 lck_rw_done(&mp
->mnt_rwlock
); 
1470         if (did_set_lmount
) { 
1471                 mount_lock_spin(mp
); 
1472                 mp
->mnt_lflag 
&= ~MNT_LMOUNT
; 
1477                 if (mp
->mnt_crossref
) { 
1478                         mount_dropcrossref(mp
, vp
, 0); 
1480                         mount_lock_destroy(mp
); 
1482                         mac_mount_label_destroy(mp
); 
1484                         zfree(mount_zone
, mp
); 
1489                 vfsp
->vfc_refcount
--; 
1490                 mount_list_unlock(); 
1497  * Flush in-core data, check for competing mount attempts, 
1501 prepare_coveredvp(vnode_t vp
, vfs_context_t ctx
, struct componentname 
*cnp
, const char *fsname
, boolean_t skip_auth
) 
1504 #pragma unused(cnp,fsname) 
1506         struct vnode_attr va
; 
1511                  * If the user is not root, ensure that they own the directory 
1512                  * onto which we are attempting to mount. 
1515                 VATTR_WANTED(&va
, va_uid
); 
1516                 if ((error 
= vnode_getattr(vp
, &va
, ctx
)) || 
1517                     (va
.va_uid 
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) && 
1518                     (!vfs_context_issuser(ctx
)))) { 
1524         if ((error 
= VNOP_FSYNC(vp
, MNT_WAIT
, ctx
))) { 
1528         if ((error 
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0))) { 
1532         if (vp
->v_type 
!= VDIR
) { 
1537         if (ISSET(vp
->v_flag
, VMOUNT
) && (vp
->v_mountedhere 
!= NULL
)) { 
1543         error 
= mac_mount_check_mount(ctx
, vp
, 
1550         vnode_lock_spin(vp
); 
1551         SET(vp
->v_flag
, VMOUNT
); 
1558 #if CONFIG_IMGSRC_ACCESS 
1560 #define DEBUG_IMGSRC 0 
1563 #define IMGSRC_DEBUG(args...) printf("imgsrc: " args) 
1565 #define IMGSRC_DEBUG(args...) do { } while(0) 
1569 authorize_devpath_and_update_mntfromname(mount_t mp
, user_addr_t devpath
, vnode_t 
*devvpp
, vfs_context_t ctx
) 
1571         struct nameidata nd
; 
1572         vnode_t vp
, realdevvp
; 
1575         enum uio_seg uio 
= UIO_USERSPACE
; 
1577         if (ctx 
== vfs_context_kernel()) { 
1581         NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW
, uio
, devpath
, ctx
); 
1582         if ((error 
= namei(&nd
))) { 
1583                 IMGSRC_DEBUG("namei() failed with %d\n", error
); 
1589         if (!vnode_isblk(vp
)) { 
1590                 IMGSRC_DEBUG("Not block device.\n"); 
1595         realdevvp 
= mp
->mnt_devvp
; 
1596         if (realdevvp 
== NULLVP
) { 
1597                 IMGSRC_DEBUG("No device backs the mount.\n"); 
1602         error 
= vnode_getwithref(realdevvp
); 
1604                 IMGSRC_DEBUG("Coudn't get iocount on device.\n"); 
1608         if (vnode_specrdev(vp
) != vnode_specrdev(realdevvp
)) { 
1609                 IMGSRC_DEBUG("Wrong dev_t.\n"); 
1614         strlcpy(mp
->mnt_vfsstat
.f_mntfromname
, nd
.ni_cnd
.cn_pnbuf
, MAXPATHLEN
); 
1617          * If mount by non-root, then verify that user has necessary 
1618          * permissions on the device. 
1620         if (!vfs_context_issuser(ctx
)) { 
1621                 accessmode 
= KAUTH_VNODE_READ_DATA
; 
1622                 if ((mp
->mnt_flag 
& MNT_RDONLY
) == 0) { 
1623                         accessmode 
|= KAUTH_VNODE_WRITE_DATA
; 
1625                 if ((error 
= vnode_authorize(vp
, NULL
, accessmode
, ctx
)) != 0) { 
1626                         IMGSRC_DEBUG("Access denied.\n"); 
1634         vnode_put(realdevvp
); 
1647  * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode, 
1648  * and call checkdirs() 
1651 place_mount_and_checkdirs(mount_t mp
, vnode_t vp
, vfs_context_t ctx
) 
1655         mp
->mnt_vnodecovered 
= vp
; /* XXX This is normally only set at init-time ... */ 
1657         IMGSRC_DEBUG("placing: fsname = %s, vp = %s\n", 
1658             mp
->mnt_vtable
->vfc_name
, vnode_getname(vp
)); 
1660         vnode_lock_spin(vp
); 
1661         CLR(vp
->v_flag
, VMOUNT
); 
1662         vp
->v_mountedhere 
= mp
; 
1666          * taking the name_cache_lock exclusively will 
1667          * insure that everyone is out of the fast path who 
1668          * might be trying to use a now stale copy of 
1669          * vp->v_mountedhere->mnt_realrootvp 
1670          * bumping mount_generation causes the cached values 
1675         name_cache_unlock(); 
1677         error 
= vnode_ref(vp
); 
1682         error 
= checkdirs(vp
, ctx
); 
1684                 /* Unmount the filesystem as cdir/rdirs cannot be updated */ 
1691                 mp
->mnt_vnodecovered 
= NULLVP
; 
1697 undo_place_on_covered_vp(mount_t mp
, vnode_t vp
) 
1700         vnode_lock_spin(vp
); 
1701         vp
->v_mountedhere 
= (mount_t
)NULL
; 
1704         mp
->mnt_vnodecovered 
= NULLVP
; 
1708 mount_begin_update(mount_t mp
, vfs_context_t ctx
, int flags
) 
1712         /* unmount in progress return error */ 
1713         mount_lock_spin(mp
); 
1714         if (mp
->mnt_lflag 
& (MNT_LUNMOUNT 
| MNT_LMOUNT
)) { 
1719         lck_rw_lock_exclusive(&mp
->mnt_rwlock
); 
1722          * We only allow the filesystem to be reloaded if it 
1723          * is currently mounted read-only. 
1725         if ((flags 
& MNT_RELOAD
) && 
1726             ((mp
->mnt_flag 
& MNT_RDONLY
) == 0)) { 
1732          * Only root, or the user that did the original mount is 
1733          * permitted to update it. 
1735         if (mp
->mnt_vfsstat
.f_owner 
!= kauth_cred_getuid(vfs_context_ucred(ctx
)) && 
1736             (!vfs_context_issuser(ctx
))) { 
1741         error 
= mac_mount_check_remount(ctx
, mp
); 
1749                 lck_rw_done(&mp
->mnt_rwlock
); 
1756 mount_end_update(mount_t mp
) 
1758         lck_rw_done(&mp
->mnt_rwlock
); 
1762 get_imgsrc_rootvnode(uint32_t height
, vnode_t 
*rvpp
) 
1766         if (height 
>= MAX_IMAGEBOOT_NESTING
) { 
1770         vp 
= imgsrc_rootvnodes
[height
]; 
1771         if ((vp 
!= NULLVP
) && (vnode_get(vp
) == 0)) { 
1780 relocate_imageboot_source(vnode_t pvp
, vnode_t vp
, 
1781     struct componentname 
*cnp
, const char *fsname
, vfs_context_t ctx
, 
1782     boolean_t is64bit
, user_addr_t fsmountargs
, boolean_t by_index
) 
1786         boolean_t placed 
= FALSE
; 
1787         struct vfstable 
*vfsp
; 
1788         user_addr_t devpath
; 
1789         char *old_mntonname
; 
1795         /* If we didn't imageboot, nothing to move */ 
1796         if (imgsrc_rootvnodes
[0] == NULLVP
) { 
1800         /* Only root can do this */ 
1801         if (!vfs_context_issuser(ctx
)) { 
1805         IMGSRC_DEBUG("looking for root vnode.\n"); 
1808          * Get root vnode of filesystem we're moving. 
1812                         struct user64_mnt_imgsrc_args mia64
; 
1813                         error 
= copyin(fsmountargs
, &mia64
, sizeof(mia64
)); 
1815                                 IMGSRC_DEBUG("Failed to copy in arguments.\n"); 
1819                         height 
= mia64
.mi_height
; 
1820                         flags 
= mia64
.mi_flags
; 
1821                         devpath 
= (user_addr_t
)mia64
.mi_devpath
; 
1823                         struct user32_mnt_imgsrc_args mia32
; 
1824                         error 
= copyin(fsmountargs
, &mia32
, sizeof(mia32
)); 
1826                                 IMGSRC_DEBUG("Failed to copy in arguments.\n"); 
1830                         height 
= mia32
.mi_height
; 
1831                         flags 
= mia32
.mi_flags
; 
1832                         devpath 
= mia32
.mi_devpath
; 
1836                  * For binary compatibility--assumes one level of nesting. 
1839                         if ((error 
= copyin(fsmountargs
, (caddr_t
)&devpath
, sizeof(devpath
)))) { 
1844                         if ((error 
= copyin(fsmountargs
, (caddr_t
)&tmp
, sizeof(tmp
)))) { 
1848                         /* munge into LP64 addr */ 
1849                         devpath 
= CAST_USER_ADDR_T(tmp
); 
1857                 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__
); 
1861         error 
= get_imgsrc_rootvnode(height
, &rvp
); 
1863                 IMGSRC_DEBUG("getting old root vnode failed with %d\n", error
); 
1867         IMGSRC_DEBUG("got old root vnode\n"); 
1869         old_mntonname 
= zalloc_flags(ZV_NAMEI
, Z_WAITOK
); 
1871         /* Can only move once */ 
1872         mp 
= vnode_mount(rvp
); 
1873         if ((mp
->mnt_kern_flag 
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) { 
1874                 IMGSRC_DEBUG("Already moved.\n"); 
1879         IMGSRC_DEBUG("moving rvp: fsname = %s\n", mp
->mnt_vtable
->vfc_name
); 
1880         IMGSRC_DEBUG("Starting updated.\n"); 
1882         /* Get exclusive rwlock on mount, authorize update on mp */ 
1883         error 
= mount_begin_update(mp
, ctx
, 0); 
1885                 IMGSRC_DEBUG("Starting updated failed with %d\n", error
); 
1890          * It can only be moved once.  Flag is set under the rwlock, 
1891          * so we're now safe to proceed. 
1893         if ((mp
->mnt_kern_flag 
& MNTK_HAS_MOVED
) == MNTK_HAS_MOVED
) { 
1894                 IMGSRC_DEBUG("Already moved [2]\n"); 
1898         IMGSRC_DEBUG("Preparing coveredvp.\n"); 
1900         /* Mark covered vnode as mount in progress, authorize placing mount on top */ 
1901         error 
= prepare_coveredvp(vp
, ctx
, cnp
, fsname
, FALSE
); 
1903                 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error
); 
1907         IMGSRC_DEBUG("Covered vp OK.\n"); 
1909         /* Sanity check the name caller has provided */ 
1910         vfsp 
= mp
->mnt_vtable
; 
1911         if (strncmp(vfsp
->vfc_name
, fsname
, MFSNAMELEN
) != 0) { 
1912                 IMGSRC_DEBUG("Wrong fs name: actual = %s, expected = %s\n", 
1913                     vfsp
->vfc_name
, fsname
); 
1918         /* Check the device vnode and update mount-from name, for local filesystems */ 
1919         if (vfsp
->vfc_vfsflags 
& VFC_VFSLOCALARGS
) { 
1920                 IMGSRC_DEBUG("Local, doing device validation.\n"); 
1922                 if (devpath 
!= USER_ADDR_NULL
) { 
1923                         error 
= authorize_devpath_and_update_mntfromname(mp
, devpath
, &devvp
, ctx
); 
1925                                 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n"); 
1934          * Place mp on top of vnode, ref the vnode,  call checkdirs(), 
1935          * and increment the name cache's mount generation 
1938         IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n"); 
1939         error 
= place_mount_and_checkdirs(mp
, vp
, ctx
); 
1946         strlcpy(old_mntonname
, mp
->mnt_vfsstat
.f_mntonname
, MAXPATHLEN
); 
1947         strlcpy(mp
->mnt_vfsstat
.f_mntonname
, cnp
->cn_pnbuf
, MAXPATHLEN
); 
1949         /* Forbid future moves */ 
1951         mp
->mnt_kern_flag 
|= MNTK_HAS_MOVED
; 
1954         /* Finally, add to mount list, completely ready to go */ 
1955         if (mount_list_add(mp
) != 0) { 
1957                  * The system is shutting down trying to umount 
1958                  * everything, so fail with a plausible errno. 
1964         mount_end_update(mp
); 
1966         zfree(ZV_NAMEI
, old_mntonname
); 
1968         vfs_notify_mount(pvp
); 
1972         strlcpy(mp
->mnt_vfsstat
.f_mntonname
, old_mntonname
, MAXPATHLEN
); 
1975         mp
->mnt_kern_flag 
&= ~(MNTK_HAS_MOVED
); 
1980          * Placing the mp on the vnode clears VMOUNT, 
1981          * so cleanup is different after that point 
1984                 /* Rele the vp, clear VMOUNT and v_mountedhere */ 
1985                 undo_place_on_covered_vp(mp
, vp
); 
1987                 vnode_lock_spin(vp
); 
1988                 CLR(vp
->v_flag
, VMOUNT
); 
1992         mount_end_update(mp
); 
1996         zfree(ZV_NAMEI
, old_mntonname
); 
2000 #if CONFIG_LOCKERBOOT 
2003 mount_locker_protoboot(const char *fsname
, const char *mntpoint
, 
2004     const char *pbdevpath
) 
2007         struct nameidata nd
; 
2008         boolean_t cleanup_nd 
= FALSE
; 
2009         vfs_context_t ctx 
= vfs_context_kernel(); 
2010         boolean_t is64 
= TRUE
; 
2011         boolean_t by_index 
= TRUE
; 
2012         struct user64_mnt_imgsrc_args mia64 
= { 
2015                 .mi_devpath 
= CAST_USER_ADDR_T(pbdevpath
), 
2017         user_addr_t mia64addr 
= CAST_USER_ADDR_T(&mia64
); 
2019         NDINIT(&nd
, LOOKUP
, OP_MOUNT
, FOLLOW 
| AUDITVNPATH1 
| WANTPARENT
, 
2020             UIO_SYSSPACE
, CAST_USER_ADDR_T(mntpoint
), ctx
); 
2023                 IMGSRC_DEBUG("namei: %d\n", error
); 
2028         error 
= relocate_imageboot_source(nd
.ni_dvp
, nd
.ni_vp
, 
2029             &nd
.ni_cnd
, fsname
, ctx
, is64
, mia64addr
, by_index
); 
2033                 int stashed 
= error
; 
2035                 error 
= vnode_put(nd
.ni_vp
); 
2037                         panic("vnode_put() returned non-zero: %d", error
); 
2041                         error 
= vnode_put(nd
.ni_dvp
); 
2043                                 panic("vnode_put() returned non-zero: %d", error
); 
2052 #endif /* CONFIG_LOCKERBOOT */ 
2053 #endif /* CONFIG_IMGSRC_ACCESS */ 
2056 enablequotas(struct mount 
*mp
, vfs_context_t ctx
) 
2058         struct nameidata qnd
; 
2060         char qfpath
[MAXPATHLEN
]; 
2061         const char *qfname 
= QUOTAFILENAME
; 
2062         const char *qfopsname 
= QUOTAOPSNAME
; 
2063         const char *qfextension
[] = INITQFNAMES
; 
2065         /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */ 
2066         if (strncmp(mp
->mnt_vfsstat
.f_fstypename
, "hfs", sizeof("hfs")) != 0) { 
2070          * Enable filesystem disk quotas if necessary. 
2071          * We ignore errors as this should not interfere with final mount 
2073         for (type 
= 0; type 
< MAXQUOTAS
; type
++) { 
2074                 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfopsname
, qfextension
[type
]); 
2075                 NDINIT(&qnd
, LOOKUP
, OP_MOUNT
, FOLLOW
, UIO_SYSSPACE
, 
2076                     CAST_USER_ADDR_T(qfpath
), ctx
); 
2077                 if (namei(&qnd
) != 0) { 
2078                         continue;           /* option file to trigger quotas is not present */ 
2080                 vnode_put(qnd
.ni_vp
); 
2082                 snprintf(qfpath
, sizeof(qfpath
), "%s/%s.%s", mp
->mnt_vfsstat
.f_mntonname
, qfname
, qfextension
[type
]); 
2084                 (void) VFS_QUOTACTL(mp
, QCMD(Q_QUOTAON
, type
), 0, qfpath
, ctx
); 
2091 checkdirs_callback(proc_t p
, void * arg
) 
2093         struct cdirargs 
* cdrp 
= (struct cdirargs 
*)arg
; 
2094         vnode_t olddp 
= cdrp
->olddp
; 
2095         vnode_t newdp 
= cdrp
->newdp
; 
2096         struct filedesc 
*fdp
; 
2097         vnode_t new_cvp 
= newdp
; 
2098         vnode_t new_rvp 
= newdp
; 
2099         vnode_t old_cvp 
= NULL
; 
2100         vnode_t old_rvp 
= NULL
; 
2103          * XXX Also needs to iterate each thread in the process to see if it 
2104          * XXX is using a per-thread current working directory, and, if so, 
2105          * XXX update that as well. 
2109          * First, with the proc_fdlock held, check to see if we will need 
2110          * to do any work.  If not, we will get out fast. 
2115             (fdp
->fd_cdir 
!= olddp 
&& fdp
->fd_rdir 
!= olddp
)) { 
2117                 return PROC_RETURNED
; 
2122          * Ok, we will have to do some work.  Always take two refs 
2123          * because we might need that many.  We'll dispose of whatever 
2124          * we ended up not using. 
2126         if (vnode_ref(newdp
) != 0) { 
2127                 return PROC_RETURNED
; 
2129         if (vnode_ref(newdp
) != 0) { 
2131                 return PROC_RETURNED
; 
2134         proc_dirs_lock_exclusive(p
); 
2136          * Now do the work.  Note: we dropped the proc_fdlock, so we 
2137          * have to do all of the checks again. 
2142                 if (fdp
->fd_cdir 
== olddp
) { 
2144                         fdp
->fd_cdir 
= newdp
; 
2147                 if (fdp
->fd_rdir 
== olddp
) { 
2149                         fdp
->fd_rdir 
= newdp
; 
2154         proc_dirs_unlock_exclusive(p
); 
2157          * Dispose of any references that are no longer needed. 
2159         if (old_cvp 
!= NULL
) { 
2160                 vnode_rele(old_cvp
); 
2162         if (old_rvp 
!= NULL
) { 
2163                 vnode_rele(old_rvp
); 
2165         if (new_cvp 
!= NULL
) { 
2166                 vnode_rele(new_cvp
); 
2168         if (new_rvp 
!= NULL
) { 
2169                 vnode_rele(new_rvp
); 
2172         return PROC_RETURNED
; 
2178  * Scan all active processes to see if any of them have a current 
2179  * or root directory onto which the new filesystem has just been 
2180  * mounted. If so, replace them with the new mount point. 
2183 checkdirs(vnode_t olddp
, vfs_context_t ctx
) 
2188         struct cdirargs cdr
; 
2190         if (olddp
->v_usecount 
== 1) { 
2193         err 
= VFS_ROOT(olddp
->v_mountedhere
, &newdp
, ctx
); 
2197                 panic("mount: lost mount: error %d", err
); 
2204         /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */ 
2205         proc_iterate(PROC_ALLPROCLIST 
| PROC_NOWAITTRANS
, checkdirs_callback
, (void *)&cdr
, NULL
, NULL
); 
2207         if (rootvnode 
== olddp
) { 
2209                 lck_rw_lock_exclusive(&rootvnode_rw_lock
); 
2212                 lck_rw_unlock_exclusive(&rootvnode_rw_lock
); 
2221  * Unmount a file system. 
2223  * Note: unmount takes a path to the vnode mounted on as argument, 
2224  * not special file (as before). 
2228 unmount(__unused proc_t p
, struct unmount_args 
*uap
, __unused 
int32_t *retval
) 
2233         struct nameidata nd
; 
2234         vfs_context_t ctx 
= vfs_context_current(); 
2236         NDINIT(&nd
, LOOKUP
, OP_UNMOUNT
, FOLLOW 
| AUDITVNPATH1
, 
2237             UIO_USERSPACE
, uap
->path
, ctx
); 
2247         error 
= mac_mount_check_umount(ctx
, mp
); 
2254          * Must be the root of the filesystem 
2256         if ((vp
->v_flag 
& VROOT
) == 0) { 
2262         /* safedounmount consumes the mount ref */ 
2263         return safedounmount(mp
, uap
->flags
, ctx
); 
2267 vfs_unmountbyfsid(fsid_t 
*fsid
, int flags
, vfs_context_t ctx
) 
2271         mp 
= mount_list_lookupby_fsid(fsid
, 0, 1); 
2272         if (mp 
== (mount_t
)0) { 
2277         /* safedounmount consumes the mount ref */ 
2278         return safedounmount(mp
, flags
, ctx
); 
2281 #define ROLE_ACCOUNT_UNMOUNT_ENTITLEMENT        \ 
2282         "com.apple.private.vfs.role-account-unmount" 
2285  * The mount struct comes with a mount ref which will be consumed. 
2286  * Do the actual file system unmount, prevent some common foot shooting. 
2289 safedounmount(struct mount 
*mp
, int flags
, vfs_context_t ctx
) 
2292         proc_t p 
= vfs_context_proc(ctx
); 
2295          * If the file system is not responding and MNT_NOBLOCK 
2296          * is set and not a forced unmount then return EBUSY. 
2298         if ((mp
->mnt_kern_flag 
& MNT_LNOTRESP
) && 
2299             (flags 
& MNT_NOBLOCK
) && ((flags 
& MNT_FORCE
) == 0)) { 
2305          * Skip authorization in two cases: 
2306          * - If the process running the unmount has ROLE_ACCOUNT_UNMOUNT_ENTITLEMENT. 
2307          *   This entitlement allows non-root processes unmount volumes mounted by 
2309          * - If the mount is tagged as permissive and this is not a forced-unmount 
2312         if (!IOTaskHasEntitlement(current_task(), ROLE_ACCOUNT_UNMOUNT_ENTITLEMENT
) && 
2313             (!(((mp
->mnt_kern_flag 
& MNTK_PERMIT_UNMOUNT
) != 0) && ((flags 
& MNT_FORCE
) == 0)))) { 
2315                  * Only root, or the user that did the original mount is 
2316                  * permitted to unmount this filesystem. 
2318                 if ((mp
->mnt_vfsstat
.f_owner 
!= kauth_cred_getuid(kauth_cred_get())) && 
2319                     (error 
= suser(kauth_cred_get(), &p
->p_acflag
))) { 
2324          * Don't allow unmounting the root file system, or other volumes 
2325          * associated with it (for example, the associated VM or DATA mounts) . 
2327         if ((mp
->mnt_flag 
& MNT_ROOTFS
) || (mp
->mnt_kern_flag 
& MNTK_SYSTEM
)) { 
2328                 if (!(mp
->mnt_flag 
& MNT_ROOTFS
)) { 
2329                         printf("attempt to unmount a system mount (%s), will return EBUSY\n", 
2330                             mp
->mnt_vfsstat
.f_mntonname
); 
2332                 error 
= EBUSY
; /* the root (or associated volumes) is always busy */ 
2337          * If the mount is providing the root filesystem's disk image 
2338          * (i.e. imageboot), don't allow unmounting 
2340         if (mp
->mnt_kern_flag 
& MNTK_BACKS_ROOT
) { 
2345         return dounmount(mp
, flags
, 1, ctx
); 
2353  * Do the actual file system unmount. 
2356 dounmount(struct mount 
*mp
, int flags
, int withref
, vfs_context_t ctx
) 
2358         vnode_t coveredvp 
= (vnode_t
)0; 
2361         int forcedunmount 
= 0; 
2363         struct vnode 
*devvp 
= NULLVP
; 
2365         proc_t p 
= vfs_context_proc(ctx
); 
2367         int pflags_save 
= 0; 
2368 #endif /* CONFIG_TRIGGERS */ 
2371         if (!(flags 
& MNT_FORCE
)) { 
2372                 fsevent_unmount(mp
, ctx
);  /* has to come first! */ 
2379          * If already an unmount in progress just return EBUSY. 
2380          * Even a forced unmount cannot override. 
2382         if (mp
->mnt_lflag 
& (MNT_LUNMOUNT 
| MNT_LMOUNT
)) { 
2390         if (flags 
& MNT_FORCE
) { 
2392                 mp
->mnt_lflag 
|= MNT_LFORCE
; 
2396         if (flags 
& MNT_NOBLOCK 
&& p 
!= kernproc
) { 
2397                 pflags_save 
= OSBitOrAtomic(P_NOREMOTEHANG
, &p
->p_flag
); 
2401         mp
->mnt_kern_flag 
|= MNTK_UNMOUNT
; 
2402         mp
->mnt_lflag 
|= MNT_LUNMOUNT
; 
2403         mp
->mnt_flag 
&= ~MNT_ASYNC
; 
2405          * anyone currently in the fast path that 
2406          * trips over the cached rootvp will be 
2407          * dumped out and forced into the slow path 
2408          * to regenerate a new cached value 
2410         mp
->mnt_realrootvp 
= NULLVP
; 
2413         if (forcedunmount 
&& (flags 
& MNT_LNOSUB
) == 0) { 
2415                  * Force unmount any mounts in this filesystem. 
2416                  * If any unmounts fail - just leave them dangling. 
2419                 (void) dounmount_submounts(mp
, flags 
| MNT_LNOSUB
, ctx
); 
2423          * taking the name_cache_lock exclusively will 
2424          * insure that everyone is out of the fast path who 
2425          * might be trying to use a now stale copy of 
2426          * vp->v_mountedhere->mnt_realrootvp 
2427          * bumping mount_generation causes the cached values 
2432         name_cache_unlock(); 
2435         lck_rw_lock_exclusive(&mp
->mnt_rwlock
); 
2440         if (forcedunmount 
== 0) { 
2441                 ubc_umount(mp
); /* release cached vnodes */ 
2442                 if ((mp
->mnt_flag 
& MNT_RDONLY
) == 0) { 
2443                         error 
= VFS_SYNC(mp
, MNT_WAIT
, ctx
); 
2446                                 mp
->mnt_kern_flag 
&= ~MNTK_UNMOUNT
; 
2447                                 mp
->mnt_lflag 
&= ~MNT_LUNMOUNT
; 
2448                                 mp
->mnt_lflag 
&= ~MNT_LFORCE
; 
2454         IOBSDMountChange(mp
, kIOMountChangeUnmount
); 
2457         vfs_nested_trigger_unmounts(mp
, flags
, ctx
); 
2460         if (forcedunmount
) { 
2461                 lflags 
|= FORCECLOSE
; 
2463         error 
= vflush(mp
, NULLVP
, SKIPSWAP 
| SKIPSYSTEM  
| SKIPROOT 
| lflags
); 
2464         if ((forcedunmount 
== 0) && error
) { 
2466                 mp
->mnt_kern_flag 
&= ~MNTK_UNMOUNT
; 
2467                 mp
->mnt_lflag 
&= ~MNT_LUNMOUNT
; 
2468                 mp
->mnt_lflag 
&= ~MNT_LFORCE
; 
2472         /* make sure there are no one in the mount iterations or lookup */ 
2473         mount_iterdrain(mp
); 
2475         error 
= VFS_UNMOUNT(mp
, flags
, ctx
); 
2477                 mount_iterreset(mp
); 
2479                 mp
->mnt_kern_flag 
&= ~MNTK_UNMOUNT
; 
2480                 mp
->mnt_lflag 
&= ~MNT_LUNMOUNT
; 
2481                 mp
->mnt_lflag 
&= ~MNT_LFORCE
; 
2485         /* increment the operations count */ 
2487                 OSAddAtomic(1, &vfs_nummntops
); 
2490         if (mp
->mnt_devvp 
&& mp
->mnt_vtable
->vfc_vfsflags 
& VFC_VFSLOCALARGS
) { 
2491                 /* hold an io reference and drop the usecount before close */ 
2492                 devvp 
= mp
->mnt_devvp
; 
2493                 vnode_getalways(devvp
); 
2495                 VNOP_CLOSE(devvp
, mp
->mnt_flag 
& MNT_RDONLY 
? FREAD 
: FREAD 
| FWRITE
, 
2497                 vnode_clearmountedon(devvp
); 
2500         lck_rw_done(&mp
->mnt_rwlock
); 
2501         mount_list_remove(mp
); 
2502         lck_rw_lock_exclusive(&mp
->mnt_rwlock
); 
2504         /* mark the mount point hook in the vp but not drop the ref yet */ 
2505         if ((coveredvp 
= mp
->mnt_vnodecovered
) != NULLVP
) { 
2507                  * The covered vnode needs special handling. Trying to get an 
2508                  * iocount must not block here as this may lead to deadlocks 
2509                  * if the Filesystem to which the covered vnode belongs is 
2510                  * undergoing forced unmounts. Since we hold a usecount, the 
2511                  * vnode cannot be reused (it can, however, still be terminated) 
2513                 vnode_getalways(coveredvp
); 
2514                 vnode_lock_spin(coveredvp
); 
2517                 coveredvp
->v_mountedhere 
= (struct mount 
*)0; 
2518                 CLR(coveredvp
->v_flag
, VMOUNT
); 
2520                 vnode_unlock(coveredvp
); 
2521                 vnode_put(coveredvp
); 
2525         mp
->mnt_vtable
->vfc_refcount
--; 
2526         mount_list_unlock(); 
2528         cache_purgevfs(mp
);     /* remove cache entries for this file sys */ 
2529         vfs_event_signal(NULL
, VQ_UNMOUNT
, (intptr_t)NULL
); 
2531         mp
->mnt_lflag 
|= MNT_LDEAD
; 
2533         if (mp
->mnt_lflag 
& MNT_LWAIT
) { 
2535                  * do the wakeup here 
2536                  * in case we block in mount_refdrain 
2537                  * which will drop the mount lock 
2538                  * and allow anyone blocked in vfs_busy 
2539                  * to wakeup and see the LDEAD state 
2541                 mp
->mnt_lflag 
&= ~MNT_LWAIT
; 
2542                 wakeup((caddr_t
)mp
); 
2546         /* free disk_conditioner_info structure for this mount */ 
2547         disk_conditioner_unmount(mp
); 
2550         if (mp
->mnt_lflag 
& MNT_LWAIT
) { 
2551                 mp
->mnt_lflag 
&= ~MNT_LWAIT
; 
2556         if (flags 
& MNT_NOBLOCK 
&& p 
!= kernproc
) { 
2557                 // Restore P_NOREMOTEHANG bit to its previous value 
2558                 if ((pflags_save 
& P_NOREMOTEHANG
) == 0) { 
2559                         OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG
), &p
->p_flag
); 
2564          * Callback and context are set together under the mount lock, and 
2565          * never cleared, so we're safe to examine them here, drop the lock, 
2568         if (mp
->mnt_triggercallback 
!= NULL
) { 
2571                         mp
->mnt_triggercallback(mp
, VTC_RELEASE
, mp
->mnt_triggerdata
, ctx
); 
2572                 } else if (did_vflush
) { 
2573                         mp
->mnt_triggercallback(mp
, VTC_REPLACE
, mp
->mnt_triggerdata
, ctx
); 
2580 #endif /* CONFIG_TRIGGERS */ 
2582         lck_rw_done(&mp
->mnt_rwlock
); 
2585                 wakeup((caddr_t
)mp
); 
2589                 if ((coveredvp 
!= NULLVP
)) { 
2590                         vnode_t pvp 
= NULLVP
; 
2593                          * The covered vnode needs special handling. Trying to 
2594                          * get an iocount must not block here as this may lead 
2595                          * to deadlocks if the Filesystem to which the covered 
2596                          * vnode belongs is undergoing forced unmounts. Since we 
2597                          * hold a usecount, the  vnode cannot be reused 
2598                          * (it can, however, still be terminated). 
2600                         vnode_getalways(coveredvp
); 
2602                         mount_dropcrossref(mp
, coveredvp
, 0); 
2604                          * We'll _try_ to detect if this really needs to be 
2605                          * done. The coveredvp can only be in termination (or 
2606                          * terminated) if the coveredvp's mount point is in a 
2607                          * forced unmount (or has been) since we still hold the 
2610                         if (!vnode_isrecycled(coveredvp
)) { 
2611                                 pvp 
= vnode_getparent(coveredvp
); 
2613                                 if (coveredvp
->v_resolve
) { 
2614                                         vnode_trigger_rearm(coveredvp
, ctx
); 
2619                         vnode_rele(coveredvp
); 
2620                         vnode_put(coveredvp
); 
2624                                 lock_vnode_and_post(pvp
, NOTE_WRITE
); 
2627                 } else if (mp
->mnt_flag 
& MNT_ROOTFS
) { 
2628                         mount_lock_destroy(mp
); 
2630                         mac_mount_label_destroy(mp
); 
2632                         zfree(mount_zone
, mp
); 
2634                         panic("dounmount: no coveredvp"); 
2641  * Unmount any mounts in this filesystem. 
2644 dounmount_submounts(struct mount 
*mp
, int flags
, vfs_context_t ctx
) 
2647         fsid_t 
*fsids
, fsid
; 
2649         int count 
= 0, i
, m 
= 0; 
2654         // Get an array to hold the submounts fsids. 
2655         TAILQ_FOREACH(smp
, &mountlist
, mnt_list
) 
2657         fsids_sz 
= count 
* sizeof(fsid_t
); 
2658         fsids 
= kheap_alloc(KHEAP_TEMP
, fsids_sz
, Z_NOWAIT
); 
2659         if (fsids 
== NULL
) { 
2660                 mount_list_unlock(); 
2663         fsids
[0] = mp
->mnt_vfsstat
.f_fsid
;      // Prime the pump 
2666          * Fill the array with submount fsids. 
2667          * Since mounts are always added to the tail of the mount list, the 
2668          * list is always in mount order. 
2669          * For each mount check if the mounted-on vnode belongs to a 
2670          * mount that's already added to our array of mounts to be unmounted. 
2672         for (smp 
= TAILQ_NEXT(mp
, mnt_list
); smp
; smp 
= TAILQ_NEXT(smp
, mnt_list
)) { 
2673                 vp 
= smp
->mnt_vnodecovered
; 
2677                 fsid 
= vnode_mount(vp
)->mnt_vfsstat
.f_fsid
;     // Underlying fsid 
2678                 for (i 
= 0; i 
<= m
; i
++) { 
2679                         if (fsids
[i
].val
[0] == fsid
.val
[0] && 
2680                             fsids
[i
].val
[1] == fsid
.val
[1]) { 
2681                                 fsids
[++m
] = smp
->mnt_vfsstat
.f_fsid
; 
2686         mount_list_unlock(); 
2688         // Unmount the submounts in reverse order. Ignore errors. 
2689         for (i 
= m
; i 
> 0; i
--) { 
2690                 smp 
= mount_list_lookupby_fsid(&fsids
[i
], 0, 1); 
2693                         mount_iterdrop(smp
); 
2694                         (void) dounmount(smp
, flags
, 1, ctx
); 
2698         kheap_free(KHEAP_TEMP
, fsids
, fsids_sz
); 
2702 mount_dropcrossref(mount_t mp
, vnode_t dp
, int need_put
) 
2707         if (mp
->mnt_crossref 
< 0) { 
2708                 panic("mount cross refs -ve"); 
2711         if ((mp 
!= dp
->v_mountedhere
) && (mp
->mnt_crossref 
== 0)) { 
2713                         vnode_put_locked(dp
); 
2717                 mount_lock_destroy(mp
); 
2719                 mac_mount_label_destroy(mp
); 
2721                 zfree(mount_zone
, mp
); 
2725                 vnode_put_locked(dp
); 
2732  * Sync each mounted filesystem. 
2738 int print_vmpage_stat 
= 0; 
2741  * sync_callback:       simple wrapper that calls VFS_SYNC() on volumes 
2742  *                      mounted read-write with the passed waitfor value. 
2744  * Parameters:  mp      mount-point descriptor per mounted file-system instance. 
2745  *              arg     user argument (please see below) 
2747  * User argument is a pointer to 32 bit unsigned integer which describes the 
2748  * type of waitfor value to set for calling VFS_SYNC().  If user argument is 
2749  * passed as NULL, VFS_SYNC() is called with MNT_NOWAIT set as the default 
2752  * Returns:             VFS_RETURNED 
2755 sync_callback(mount_t mp
, void *arg
) 
2757         if ((mp
->mnt_flag 
& MNT_RDONLY
) == 0) { 
2758                 int asyncflag 
= mp
->mnt_flag 
& MNT_ASYNC
; 
2759                 unsigned waitfor 
= MNT_NOWAIT
; 
2762                         waitfor 
= *(uint32_t*)arg
; 
2765                 /* Sanity check for flags - these are the only valid combinations for the flag bits*/ 
2766                 if (waitfor 
!= MNT_WAIT 
&& 
2767                     waitfor 
!= (MNT_WAIT 
| MNT_VOLUME
) && 
2768                     waitfor 
!= MNT_NOWAIT 
&& 
2769                     waitfor 
!= (MNT_NOWAIT 
| MNT_VOLUME
) && 
2770                     waitfor 
!= MNT_DWAIT 
&& 
2771                     waitfor 
!= (MNT_DWAIT 
| MNT_VOLUME
)) { 
2772                         panic("Passed inappropriate waitfor %u to " 
2773                             "sync_callback()", waitfor
); 
2776                 mp
->mnt_flag 
&= ~MNT_ASYNC
; 
2777                 (void)VFS_SYNC(mp
, waitfor
, vfs_context_kernel()); 
2779                         mp
->mnt_flag 
|= MNT_ASYNC
; 
2783         return VFS_RETURNED
; 
2788 sync(__unused proc_t p
, __unused 
struct sync_args 
*uap
, __unused 
int32_t *retval
) 
2790         vfs_iterate(LK_NOWAIT
, sync_callback
, NULL
); 
2792         if (print_vmpage_stat
) { 
2793                 vm_countdirtypages(); 
2800 #endif /* DIAGNOSTIC */ 
2806         SYNC_ONLY_RELIABLE_MEDIA 
= 1, 
2807         SYNC_ONLY_UNRELIABLE_MEDIA 
= 2 
2811 sync_internal_callback(mount_t mp
, void *arg
) 
2814                 int is_reliable 
= !(mp
->mnt_kern_flag 
& MNTK_VIRTUALDEV
) && 
2815                     (mp
->mnt_flag 
& MNT_LOCAL
); 
2816                 sync_type_t sync_type 
= *((sync_type_t 
*)arg
); 
2818                 if ((sync_type 
== SYNC_ONLY_RELIABLE_MEDIA
) && !is_reliable
) { 
2819                         return VFS_RETURNED
; 
2820                 } else if ((sync_type 
== SYNC_ONLY_UNRELIABLE_MEDIA
) && is_reliable
) { 
2821                         return VFS_RETURNED
; 
2825         (void)sync_callback(mp
, NULL
); 
2827         return VFS_RETURNED
; 
2830 int sync_thread_state 
= 0; 
2831 int sync_timeout_seconds 
= 5; 
2833 #define SYNC_THREAD_RUN       0x0001 
2834 #define SYNC_THREAD_RUNNING   0x0002 
2836 #if CONFIG_PHYS_WRITE_ACCT 
2837 thread_t pm_sync_thread
; 
2838 #endif /* CONFIG_PHYS_WRITE_ACCT */ 
2841 sync_thread(__unused 
void *arg
, __unused wait_result_t wr
) 
2843         sync_type_t sync_type
; 
2844 #if CONFIG_PHYS_WRITE_ACCT 
2845         pm_sync_thread 
= current_thread(); 
2846 #endif /* CONFIG_PHYS_WRITE_ACCT */ 
2848         lck_mtx_lock(&sync_mtx_lck
); 
2849         while (sync_thread_state 
& SYNC_THREAD_RUN
) { 
2850                 sync_thread_state 
&= ~SYNC_THREAD_RUN
; 
2851                 lck_mtx_unlock(&sync_mtx_lck
); 
2853                 sync_type 
= SYNC_ONLY_RELIABLE_MEDIA
; 
2854                 vfs_iterate(LK_NOWAIT
, sync_internal_callback
, &sync_type
); 
2855                 sync_type 
= SYNC_ONLY_UNRELIABLE_MEDIA
; 
2856                 vfs_iterate(LK_NOWAIT
, sync_internal_callback
, &sync_type
); 
2858                 lck_mtx_lock(&sync_mtx_lck
); 
2861          * This wakeup _has_ to be issued before the lock is released otherwise 
2862          * we may end up waking up a thread in sync_internal which is 
2863          * expecting a wakeup from a thread it just created and not from this 
2864          * thread which is about to exit. 
2866         wakeup(&sync_thread_state
); 
2867         sync_thread_state 
&= ~SYNC_THREAD_RUNNING
; 
2868 #if CONFIG_PHYS_WRITE_ACCT 
2869         pm_sync_thread 
= NULL
; 
2870 #endif /* CONFIG_PHYS_WRITE_ACCT */ 
2871         lck_mtx_unlock(&sync_mtx_lck
); 
2873         if (print_vmpage_stat
) { 
2874                 vm_countdirtypages(); 
2881 #endif /* DIAGNOSTIC */ 
2884 struct timeval sync_timeout_last_print 
= {.tv_sec 
= 0, .tv_usec 
= 0}; 
2887  * An in-kernel sync for power management to call. 
2888  * This function always returns within sync_timeout seconds. 
2890 __private_extern__ 
int 
2895         int thread_created 
= FALSE
; 
2896         struct timespec ts 
= {.tv_sec 
= sync_timeout_seconds
, .tv_nsec 
= 0}; 
2898         lck_mtx_lock(&sync_mtx_lck
); 
2899         sync_thread_state 
|= SYNC_THREAD_RUN
; 
2900         if (!(sync_thread_state 
& SYNC_THREAD_RUNNING
)) { 
2903                 sync_thread_state 
|= SYNC_THREAD_RUNNING
; 
2904                 kr 
= kernel_thread_start(sync_thread
, NULL
, &thd
); 
2905                 if (kr 
!= KERN_SUCCESS
) { 
2906                         sync_thread_state 
&= ~SYNC_THREAD_RUNNING
; 
2907                         lck_mtx_unlock(&sync_mtx_lck
); 
2908                         printf("sync_thread failed\n"); 
2911                 thread_created 
= TRUE
; 
2914         error 
= msleep((caddr_t
)&sync_thread_state
, &sync_mtx_lck
, 
2915             (PVFS 
| PDROP 
| PCATCH
), "sync_thread", &ts
); 
2920                 if (now
.tv_sec 
- sync_timeout_last_print
.tv_sec 
> 120) { 
2921                         printf("sync timed out: %d sec\n", sync_timeout_seconds
); 
2922                         sync_timeout_last_print
.tv_sec 
= now
.tv_sec
; 
2926         if (thread_created
) { 
2927                 thread_deallocate(thd
); 
2931 } /* end of sync_internal call */ 
2934  * Change filesystem quotas. 
2938 quotactl(proc_t p
, struct quotactl_args 
*uap
, __unused 
int32_t *retval
) 
2941         int error
, quota_cmd
, quota_status 
= 0; 
2944         struct nameidata nd
; 
2945         vfs_context_t ctx 
= vfs_context_current(); 
2946         struct dqblk my_dqblk 
= {}; 
2948         AUDIT_ARG(uid
, uap
->uid
); 
2949         AUDIT_ARG(cmd
, uap
->cmd
); 
2950         NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, FOLLOW 
| AUDITVNPATH1
, UIO_USERSPACE
, 
2956         mp 
= nd
.ni_vp
->v_mount
; 
2958         vnode_put(nd
.ni_vp
); 
2961         /* copyin any data we will need for downstream code */ 
2962         quota_cmd 
= uap
->cmd 
>> SUBCMDSHIFT
; 
2964         switch (quota_cmd
) { 
2966                 /* uap->arg specifies a file from which to take the quotas */ 
2967                 fnamelen 
= MAXPATHLEN
; 
2968                 datap 
= zalloc(ZV_NAMEI
); 
2969                 error 
= copyinstr(uap
->arg
, datap
, MAXPATHLEN
, &fnamelen
); 
2972                 /* uap->arg is a pointer to a dqblk structure. */ 
2973                 datap 
= (caddr_t
) &my_dqblk
; 
2977                 /* uap->arg is a pointer to a dqblk structure. */ 
2978                 datap 
= (caddr_t
) &my_dqblk
; 
2979                 if (proc_is64bit(p
)) { 
2980                         struct user_dqblk       my_dqblk64
; 
2981                         error 
= copyin(uap
->arg
, (caddr_t
)&my_dqblk64
, sizeof(my_dqblk64
)); 
2983                                 munge_dqblk(&my_dqblk
, &my_dqblk64
, FALSE
); 
2986                         error 
= copyin(uap
->arg
, (caddr_t
)&my_dqblk
, sizeof(my_dqblk
)); 
2990                 /* uap->arg is a pointer to an integer */ 
2991                 datap 
= (caddr_t
) "a_status
; 
2999                 error 
= VFS_QUOTACTL(mp
, uap
->cmd
, uap
->uid
, datap
, ctx
); 
3002         switch (quota_cmd
) { 
3004                 if (datap 
!= NULL
) { 
3005                         zfree(ZV_NAMEI
, datap
); 
3009                 /* uap->arg is a pointer to a dqblk structure we need to copy out to */ 
3011                         if (proc_is64bit(p
)) { 
3012                                 struct user_dqblk       my_dqblk64
; 
3014                                 memset(&my_dqblk64
, 0, sizeof(my_dqblk64
)); 
3015                                 munge_dqblk(&my_dqblk
, &my_dqblk64
, TRUE
); 
3016                                 error 
= copyout((caddr_t
)&my_dqblk64
, uap
->arg
, sizeof(my_dqblk64
)); 
3018                                 error 
= copyout(datap
, uap
->arg
, sizeof(struct dqblk
)); 
3023                 /* uap->arg is a pointer to an integer */ 
3025                         error 
= copyout(datap
, uap
->arg
, sizeof(quota_status
)); 
3037 quotactl(__unused proc_t p
, __unused 
struct quotactl_args 
*uap
, __unused 
int32_t *retval
) 
3044  * Get filesystem statistics. 
3046  * Returns:     0                       Success 
3048  *      vfs_update_vfsstat:??? 
3049  *      munge_statfs:EFAULT 
3053 statfs(__unused proc_t p
, struct statfs_args 
*uap
, __unused 
int32_t *retval
) 
3056         struct vfsstatfs 
*sp
; 
3058         struct nameidata nd
; 
3059         vfs_context_t ctx 
= vfs_context_current(); 
3062         NDINIT(&nd
, LOOKUP
, OP_STATFS
, FOLLOW 
| AUDITVNPATH1
, 
3063             UIO_USERSPACE
, uap
->path
, ctx
); 
3070         sp 
= &mp
->mnt_vfsstat
; 
3074         error 
= mac_mount_check_stat(ctx
, mp
); 
3081         error 
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
); 
3087         error 
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
); 
3093  * Get filesystem statistics. 
3097 fstatfs(__unused proc_t p
, struct fstatfs_args 
*uap
, __unused 
int32_t *retval
) 
3101         struct vfsstatfs 
*sp
; 
3104         AUDIT_ARG(fd
, uap
->fd
); 
3106         if ((error 
= file_vnode(uap
->fd
, &vp
))) { 
3110         error 
= vnode_getwithref(vp
); 
3116         AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
); 
3125         error 
= mac_mount_check_stat(vfs_context_current(), mp
); 
3131         sp 
= &mp
->mnt_vfsstat
; 
3132         if ((error 
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) { 
3136         error 
= munge_statfs(mp
, sp
, uap
->buf
, NULL
, IS_64BIT_PROCESS(p
), TRUE
); 
3146 vfs_get_statfs64(struct mount 
*mp
, struct statfs64 
*sfs
) 
3148         struct vfsstatfs 
*vsfs 
= &mp
->mnt_vfsstat
; 
3150         bzero(sfs
, sizeof(*sfs
)); 
3152         sfs
->f_bsize 
= vsfs
->f_bsize
; 
3153         sfs
->f_iosize 
= (int32_t)vsfs
->f_iosize
; 
3154         sfs
->f_blocks 
= vsfs
->f_blocks
; 
3155         sfs
->f_bfree 
= vsfs
->f_bfree
; 
3156         sfs
->f_bavail 
= vsfs
->f_bavail
; 
3157         sfs
->f_files 
= vsfs
->f_files
; 
3158         sfs
->f_ffree 
= vsfs
->f_ffree
; 
3159         sfs
->f_fsid 
= vsfs
->f_fsid
; 
3160         sfs
->f_owner 
= vsfs
->f_owner
; 
3161         sfs
->f_type 
= mp
->mnt_vtable
->vfc_typenum
; 
3162         sfs
->f_flags 
= mp
->mnt_flag 
& MNT_VISFLAGMASK
; 
3163         sfs
->f_fssubtype 
= vsfs
->f_fssubtype
; 
3164         sfs
->f_flags_ext 
= (mp
->mnt_kern_flag 
& MNTK_SYSTEMDATA
) ? MNT_EXT_ROOT_DATA_VOL 
: 0; 
3165         if (mp
->mnt_kern_flag 
& MNTK_TYPENAME_OVERRIDE
) { 
3166                 strlcpy(&sfs
->f_fstypename
[0], &mp
->fstypename_override
[0], MFSTYPENAMELEN
); 
3168                 strlcpy(&sfs
->f_fstypename
[0], &vsfs
->f_fstypename
[0], MFSTYPENAMELEN
); 
3170         strlcpy(&sfs
->f_mntonname
[0], &vsfs
->f_mntonname
[0], MAXPATHLEN
); 
3171         strlcpy(&sfs
->f_mntfromname
[0], &vsfs
->f_mntfromname
[0], MAXPATHLEN
); 
3175  * Get file system statistics in 64-bit mode 
3178 statfs64(__unused 
struct proc 
*p
, struct statfs64_args 
*uap
, __unused 
int32_t *retval
) 
3182         struct nameidata 
*ndp
; 
3183         struct statfs64 
*sfsp
; 
3184         vfs_context_t ctxp 
= vfs_context_current(); 
3187                 struct nameidata nd
; 
3188                 struct statfs64 sfs
; 
3189         } *__nameidata_statfs64
; 
3191         __nameidata_statfs64 
= kheap_alloc(KHEAP_TEMP
, sizeof(*__nameidata_statfs64
), 
3193         ndp 
= &__nameidata_statfs64
->nd
; 
3195         NDINIT(ndp
, LOOKUP
, OP_STATFS
, FOLLOW 
| AUDITVNPATH1
, 
3196             UIO_USERSPACE
, uap
->path
, ctxp
); 
3206         error 
= mac_mount_check_stat(ctxp
, mp
); 
3213         error 
= vfs_update_vfsstat(mp
, ctxp
, VFS_USER_EVENT
); 
3219         sfsp 
= &__nameidata_statfs64
->sfs
; 
3220         vfs_get_statfs64(mp
, sfsp
); 
3221         if ((mp
->mnt_kern_flag 
& MNTK_SYSTEMDATA
) && 
3222             (p
->p_vfs_iopolicy 
& P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME
)) { 
3223                 /* This process does not want to see a seperate data volume mountpoint */ 
3224                 strlcpy(&sfsp
->f_mntonname
[0], "/", sizeof("/")); 
3226         error 
= copyout(sfsp
, uap
->buf
, sizeof(*sfsp
)); 
3230         kheap_free(KHEAP_TEMP
, __nameidata_statfs64
, sizeof(*__nameidata_statfs64
)); 
3236  * Get file system statistics in 64-bit mode 
3239 fstatfs64(__unused 
struct proc 
*p
, struct fstatfs64_args 
*uap
, __unused 
int32_t *retval
) 
3243         struct statfs64 sfs
; 
3246         AUDIT_ARG(fd
, uap
->fd
); 
3248         if ((error 
= file_vnode(uap
->fd
, &vp
))) { 
3252         error 
= vnode_getwithref(vp
); 
3258         AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
); 
3267         error 
= mac_mount_check_stat(vfs_context_current(), mp
); 
3273         if ((error 
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)) != 0) { 
3277         vfs_get_statfs64(mp
, &sfs
); 
3278         if ((mp
->mnt_kern_flag 
& MNTK_SYSTEMDATA
) && 
3279             (p
->p_vfs_iopolicy 
& P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME
)) { 
3280                 /* This process does not want to see a seperate data volume mountpoint */ 
3281                 strlcpy(&sfs
.f_mntonname
[0], "/", sizeof("/")); 
3283         error 
= copyout(&sfs
, uap
->buf
, sizeof(sfs
)); 
3292 struct getfsstat_struct 
{ 
3303 getfsstat_callback(mount_t mp
, void * arg
) 
3305         struct getfsstat_struct 
*fstp 
= (struct getfsstat_struct 
*)arg
; 
3306         struct vfsstatfs 
*sp
; 
3308         vfs_context_t ctx 
= vfs_context_current(); 
3310         if (fstp
->sfsp 
&& fstp
->count 
< fstp
->maxcount
) { 
3312                 error 
= mac_mount_check_stat(ctx
, mp
); 
3314                         fstp
->error 
= error
; 
3315                         return VFS_RETURNED_DONE
; 
3318                 sp 
= &mp
->mnt_vfsstat
; 
3320                  * If MNT_NOWAIT is specified, do not refresh the 
3321                  * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT. 
3323                 if ((mp
->mnt_lflag 
& MNT_LDEAD
) || 
3324                     (((fstp
->flags 
& MNT_NOWAIT
) == 0 || (fstp
->flags 
& (MNT_WAIT 
| MNT_DWAIT
))) && 
3325                     (!(mp
->mnt_lflag 
& MNT_LUNMOUNT
)) && 
3326                     (error 
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
)))) { 
3327                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
); 
3328                         return VFS_RETURNED
; 
3332                  * Need to handle LP64 version of struct statfs 
3334                 error 
= munge_statfs(mp
, sp
, fstp
->sfsp
, &my_size
, IS_64BIT_PROCESS(vfs_context_proc(ctx
)), FALSE
); 
3336                         fstp
->error 
= error
; 
3337                         return VFS_RETURNED_DONE
; 
3339                 fstp
->sfsp 
+= my_size
; 
3343                         error 
= mac_mount_label_get(mp
, *fstp
->mp
); 
3345                                 fstp
->error 
= error
; 
3346                                 return VFS_RETURNED_DONE
; 
3353         return VFS_RETURNED
; 
3357  * Get statistics on all filesystems. 
3360 getfsstat(__unused proc_t p
, struct getfsstat_args 
*uap
, int *retval
) 
3362         struct __mac_getfsstat_args muap
; 
3364         muap
.buf 
= uap
->buf
; 
3365         muap
.bufsize 
= uap
->bufsize
; 
3366         muap
.mac 
= USER_ADDR_NULL
; 
3368         muap
.flags 
= uap
->flags
; 
3370         return __mac_getfsstat(p
, &muap
, retval
); 
3374  * __mac_getfsstat: Get MAC-related file system statistics 
3376  * Parameters:    p                        (ignored) 
3377  *                uap                      User argument descriptor (see below) 
3378  *                retval                   Count of file system statistics (N stats) 
3380  * Indirect:      uap->bufsize             Buffer size 
3381  *                uap->macsize             MAC info size 
3382  *                uap->buf                 Buffer where information will be returned 
3384  *                uap->flags               File system flags 
3387  * Returns:        0                       Success 
3392 __mac_getfsstat(__unused proc_t p
, struct __mac_getfsstat_args 
*uap
, int *retval
) 
3396         size_t count
, maxcount
, bufsize
, macsize
; 
3397         struct getfsstat_struct fst
; 
3399         if ((unsigned)uap
->bufsize 
> INT_MAX 
|| (unsigned)uap
->macsize 
> INT_MAX
) { 
3403         bufsize 
= (size_t) uap
->bufsize
; 
3404         macsize 
= (size_t) uap
->macsize
; 
3406         if (IS_64BIT_PROCESS(p
)) { 
3407                 maxcount 
= bufsize 
/ sizeof(struct user64_statfs
); 
3409                 maxcount 
= bufsize 
/ sizeof(struct user32_statfs
); 
3417         if (uap
->mac 
!= USER_ADDR_NULL
) { 
3422                 count 
= (macsize 
/ (IS_64BIT_PROCESS(p
) ? 8 : 4)); 
3423                 if (count 
!= maxcount
) { 
3427                 /* Copy in the array */ 
3428                 mp0 
= kheap_alloc(KHEAP_TEMP
, macsize
, Z_WAITOK
); 
3433                 error 
= copyin(uap
->mac
, mp0
, macsize
); 
3435                         kheap_free(KHEAP_TEMP
, mp0
, macsize
); 
3439                 /* Normalize to an array of user_addr_t */ 
3440                 mp 
= kheap_alloc(KHEAP_TEMP
, count 
* sizeof(user_addr_t
), Z_WAITOK
); 
3442                         kheap_free(KHEAP_TEMP
, mp0
, macsize
); 
3446                 for (i 
= 0; i 
< count
; i
++) { 
3447                         if (IS_64BIT_PROCESS(p
)) { 
3448                                 mp
[i
] = ((user_addr_t 
*)mp0
)[i
]; 
3450                                 mp
[i
] = (user_addr_t
)mp0
[i
]; 
3453                 kheap_free(KHEAP_TEMP
, mp0
, macsize
); 
3460         fst
.flags 
= uap
->flags
; 
3463         fst
.maxcount 
= (int)maxcount
; 
3466         vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT
, getfsstat_callback
, &fst
); 
3469                 kheap_free(KHEAP_TEMP
, mp
, count 
* sizeof(user_addr_t
)); 
3473                 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
); 
3477         if (fst
.sfsp 
&& fst
.count 
> fst
.maxcount
) { 
3478                 *retval 
= fst
.maxcount
; 
3480                 *retval 
= fst
.count
; 
3486 getfsstat64_callback(mount_t mp
, void * arg
) 
3488         struct getfsstat_struct 
*fstp 
= (struct getfsstat_struct 
*)arg
; 
3489         struct vfsstatfs 
*sp
; 
3490         struct statfs64 sfs
; 
3493         if (fstp
->sfsp 
&& fstp
->count 
< fstp
->maxcount
) { 
3495                 error 
= mac_mount_check_stat(vfs_context_current(), mp
); 
3497                         fstp
->error 
= error
; 
3498                         return VFS_RETURNED_DONE
; 
3501                 sp 
= &mp
->mnt_vfsstat
; 
3503                  * If MNT_NOWAIT is specified, do not refresh the fsstat 
3504                  * cache. MNT_WAIT overrides MNT_NOWAIT. 
3506                  * We treat MNT_DWAIT as MNT_WAIT for all instances of 
3507                  * getfsstat, since the constants are out of the same 
3510                 if ((mp
->mnt_lflag 
& MNT_LDEAD
) || 
3511                     ((((fstp
->flags 
& MNT_NOWAIT
) == 0) || (fstp
->flags 
& (MNT_WAIT 
| MNT_DWAIT
))) && 
3512                     (!(mp
->mnt_lflag 
& MNT_LUNMOUNT
)) && 
3513                     (error 
= vfs_update_vfsstat(mp
, vfs_context_current(), VFS_USER_EVENT
)))) { 
3514                         KAUTH_DEBUG("vfs_update_vfsstat returned %d", error
); 
3515                         return VFS_RETURNED
; 
3518                 vfs_get_statfs64(mp
, &sfs
); 
3519                 error 
= copyout(&sfs
, fstp
->sfsp
, sizeof(sfs
)); 
3521                         fstp
->error 
= error
; 
3522                         return VFS_RETURNED_DONE
; 
3524                 fstp
->sfsp 
+= sizeof(sfs
); 
3527         return VFS_RETURNED
; 
3531  * Get statistics on all file systems in 64 bit mode. 
3534 getfsstat64(__unused proc_t p
, struct getfsstat64_args 
*uap
, int *retval
) 
3537         int count
, maxcount
; 
3538         struct getfsstat_struct fst
; 
3540         maxcount 
= uap
->bufsize 
/ sizeof(struct statfs64
); 
3546         fst
.flags 
= uap
->flags
; 
3549         fst
.maxcount 
= maxcount
; 
3551         vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT
, getfsstat64_callback
, &fst
); 
3554                 KAUTH_DEBUG("ERROR - %s gets %d", p
->p_comm
, fst
.error
); 
3558         if (fst
.sfsp 
&& fst
.count 
> fst
.maxcount
) { 
3559                 *retval 
= fst
.maxcount
; 
3561                 *retval 
= fst
.count
; 
3568  * gets the associated vnode with the file descriptor passed. 
3572  * ctx - vfs context of caller 
3573  * fd - file descriptor for which vnode is required. 
3574  * vpp - Pointer to pointer to vnode to be returned. 
3576  * The vnode is returned with an iocount so any vnode obtained 
3577  * by this call needs a vnode_put 
3581 vnode_getfromfd(vfs_context_t ctx
, int fd
, vnode_t 
*vpp
) 
3585         struct fileproc 
*fp
; 
3586         proc_t p 
= vfs_context_proc(ctx
); 
3590         error 
= fp_getfvp(p
, fd
, &fp
, &vp
); 
3595         error 
= vnode_getwithref(vp
); 
3597                 (void)fp_drop(p
, fd
, fp
, 0); 
3601         (void)fp_drop(p
, fd
, fp
, 0); 
3607  * Wrapper function around namei to start lookup from a directory 
3608  * specified by a file descriptor ni_dirfd. 
3610  * In addition to all the errors returned by namei, this call can 
3611  * return ENOTDIR if the file descriptor does not refer to a directory. 
3612  * and EBADF if the file descriptor is not valid. 
3615 nameiat(struct nameidata 
*ndp
, int dirfd
) 
3617         if ((dirfd 
!= AT_FDCWD
) && 
3618             !(ndp
->ni_flag 
& NAMEI_CONTLOOKUP
) && 
3619             !(ndp
->ni_cnd
.cn_flags 
& USEDVP
)) { 
3623                 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) { 
3624                         error 
= copyin(ndp
->ni_dirp
, &c
, sizeof(char)); 
3629                         c 
= *((char *)(ndp
->ni_dirp
)); 
3635                         error 
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
, 
3641                         if (vnode_vtype(dvp_at
) != VDIR
) { 
3646                         ndp
->ni_dvp 
= dvp_at
; 
3647                         ndp
->ni_cnd
.cn_flags 
|= USEDVP
; 
3649                         ndp
->ni_cnd
.cn_flags 
&= ~USEDVP
; 
3659  * Change current working directory to a given file descriptor. 
3663 common_fchdir(proc_t p
, struct fchdir_args 
*uap
, int per_thread
) 
3665         struct filedesc 
*fdp 
= p
->p_fd
; 
3670         int error
, should_put 
= 1; 
3671         vfs_context_t ctx 
= vfs_context_current(); 
3673         AUDIT_ARG(fd
, uap
->fd
); 
3674         if (per_thread 
&& uap
->fd 
== -1) { 
3676                  * Switching back from per-thread to per process CWD; verify we 
3677                  * in fact have one before proceeding.  The only success case 
3678                  * for this code path is to return 0 preemptively after zapping 
3679                  * the thread structure contents. 
3681                 thread_t th 
= vfs_context_thread(ctx
); 
3683                         uthread_t uth 
= get_bsdthread_info(th
); 
3685                         uth
->uu_cdir 
= NULLVP
; 
3686                         if (tvp 
!= NULLVP
) { 
3694         if ((error 
= file_vnode(uap
->fd
, &vp
))) { 
3697         if ((error 
= vnode_getwithref(vp
))) { 
3702         AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
); 
3704         if (vp
->v_type 
!= VDIR
) { 
3710         error 
= mac_vnode_check_chdir(ctx
, vp
); 
3715         error 
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
); 
3720         while (!error 
&& (mp 
= vp
->v_mountedhere
) != NULL
) { 
3721                 if (vfs_busy(mp
, LK_NOWAIT
)) { 
3725                 error 
= VFS_ROOT(mp
, &tdp
, ctx
); 
3736         if ((error 
= vnode_ref(vp
))) { 
3743                 thread_t th 
= vfs_context_thread(ctx
); 
3745                         uthread_t uth 
= get_bsdthread_info(th
); 
3748                         OSBitOrAtomic(P_THCWD
, &p
->p_flag
); 
3755                 proc_dirs_lock_exclusive(p
); 
3760                 proc_dirs_unlock_exclusive(p
); 
3777 fchdir(proc_t p
, struct fchdir_args 
*uap
, __unused 
int32_t *retval
) 
3779         return common_fchdir(p
, uap
, 0); 
3783 __pthread_fchdir(proc_t p
, struct __pthread_fchdir_args 
*uap
, __unused 
int32_t *retval
) 
3785         return common_fchdir(p
, (void *)uap
, 1); 
3790  * Change current working directory ("."). 
3792  * Returns:     0                       Success 
3793  *      change_dir:ENOTDIR 
3795  *      vnode_ref:ENOENT                No such file or directory 
3799 chdir_internal(proc_t p
, vfs_context_t ctx
, struct nameidata 
*ndp
, int per_thread
) 
3801         struct filedesc 
*fdp 
= p
->p_fd
; 
3805         error 
= change_dir(ndp
, ctx
); 
3809         if ((error 
= vnode_ref(ndp
->ni_vp
))) { 
3810                 vnode_put(ndp
->ni_vp
); 
3814          * drop the iocount we picked up in change_dir 
3816         vnode_put(ndp
->ni_vp
); 
3819                 thread_t th 
= vfs_context_thread(ctx
); 
3821                         uthread_t uth 
= get_bsdthread_info(th
); 
3823                         uth
->uu_cdir 
= ndp
->ni_vp
; 
3824                         OSBitOrAtomic(P_THCWD
, &p
->p_flag
); 
3826                         vnode_rele(ndp
->ni_vp
); 
3830                 proc_dirs_lock_exclusive(p
); 
3833                 fdp
->fd_cdir 
= ndp
->ni_vp
; 
3835                 proc_dirs_unlock_exclusive(p
); 
3847  * Change current working directory ("."). 
3849  * Returns:     0                       Success 
3850  *      chdir_internal:ENOTDIR 
3851  *      chdir_internal:ENOENT           No such file or directory 
3852  *      chdir_internal:??? 
3856 common_chdir(proc_t p
, struct chdir_args 
*uap
, int per_thread
) 
3858         struct nameidata nd
; 
3859         vfs_context_t ctx 
= vfs_context_current(); 
3861         NDINIT(&nd
, LOOKUP
, OP_CHDIR
, FOLLOW 
| AUDITVNPATH1
, 
3862             UIO_USERSPACE
, uap
->path
, ctx
); 
3864         return chdir_internal(p
, ctx
, &nd
, per_thread
); 
3871  * Change current working directory (".") for the entire process 
3873  * Parameters:  p       Process requesting the call 
3874  *              uap     User argument descriptor (see below) 
3877  * Indirect parameters: uap->path       Directory path 
3879  * Returns:     0                       Success 
3880  *              common_chdir: ENOTDIR 
3881  *              common_chdir: ENOENT    No such file or directory 
3886 chdir(proc_t p
, struct chdir_args 
*uap
, __unused 
int32_t *retval
) 
3888         return common_chdir(p
, (void *)uap
, 0); 
3894  * Change current working directory (".") for a single thread 
3896  * Parameters:  p       Process requesting the call 
3897  *              uap     User argument descriptor (see below) 
3900  * Indirect parameters: uap->path       Directory path 
3902  * Returns:     0                       Success 
3903  *              common_chdir: ENOTDIR 
3904  *              common_chdir: ENOENT    No such file or directory 
3909 __pthread_chdir(proc_t p
, struct __pthread_chdir_args 
*uap
, __unused 
int32_t *retval
) 
3911         return common_chdir(p
, (void *)uap
, 1); 
3916  * Change notion of root (``/'') directory. 
3920 chroot(proc_t p
, struct chroot_args 
*uap
, __unused 
int32_t *retval
) 
3922         struct filedesc 
*fdp 
= p
->p_fd
; 
3924         struct nameidata nd
; 
3926         vfs_context_t ctx 
= vfs_context_current(); 
3928         if ((error 
= suser(kauth_cred_get(), &p
->p_acflag
))) { 
3932         NDINIT(&nd
, LOOKUP
, OP_CHROOT
, FOLLOW 
| AUDITVNPATH1
, 
3933             UIO_USERSPACE
, uap
->path
, ctx
); 
3934         error 
= change_dir(&nd
, ctx
); 
3940         error 
= mac_vnode_check_chroot(ctx
, nd
.ni_vp
, 
3943                 vnode_put(nd
.ni_vp
); 
3948         if ((error 
= vnode_ref(nd
.ni_vp
))) { 
3949                 vnode_put(nd
.ni_vp
); 
3952         vnode_put(nd
.ni_vp
); 
3955          * This lock provides the guarantee that as long as you hold the lock 
3956          * fdp->fd_rdir has a usecount on it. This is used to take an iocount 
3957          * on a referenced vnode in namei when determining the rootvnode for 
3960         /* needed for synchronization with lookup */ 
3961         proc_dirs_lock_exclusive(p
); 
3962         /* needed for setting the flag and other activities on the fd itself */ 
3965         fdp
->fd_rdir 
= nd
.ni_vp
; 
3966         fdp
->fd_flags 
|= FD_CHROOT
; 
3968         proc_dirs_unlock_exclusive(p
); 
3977 #define PATHSTATICBUFLEN 256 
3978 #define PIVOT_ROOT_ENTITLEMENT              \ 
3979        "com.apple.private.vfs.pivot-root" 
3981 #if defined(XNU_TARGET_OS_OSX) 
3983 pivot_root(proc_t p
, struct pivot_root_args 
*uap
, __unused 
int *retval
) 
3986         char new_rootfs_path_before
[PATHSTATICBUFLEN
] = {0}; 
3987         char old_rootfs_path_after
[PATHSTATICBUFLEN
] = {0}; 
3988         char *new_rootfs_path_before_buf 
= NULL
; 
3989         char *old_rootfs_path_after_buf 
= NULL
; 
3990         char *incoming 
= NULL
; 
3991         char *outgoing 
= NULL
; 
3992         vnode_t incoming_rootvp 
= NULLVP
; 
3993         size_t bytes_copied
; 
3996          * XXX : Additional restrictions needed 
3997          * - perhaps callable only once. 
3999         if ((error 
= suser(kauth_cred_get(), &p
->p_acflag
))) { 
4004          * pivot_root can be executed by launchd only. 
4005          * Enforce entitlement. 
4007         if ((p
->p_pid 
!= 1) || !IOTaskHasEntitlement(current_task(), PIVOT_ROOT_ENTITLEMENT
)) { 
4011         error 
= copyinstr(uap
->new_rootfs_path_before
, &new_rootfs_path_before
[0], PATHSTATICBUFLEN
, &bytes_copied
); 
4012         if (error 
== ENAMETOOLONG
) { 
4013                 new_rootfs_path_before_buf 
= zalloc_flags(ZV_NAMEI
, Z_WAITOK
); 
4014                 error 
= copyinstr(uap
->new_rootfs_path_before
, new_rootfs_path_before_buf
, MAXPATHLEN
, &bytes_copied
); 
4021         error 
= copyinstr(uap
->old_rootfs_path_after
, &old_rootfs_path_after
[0], PATHSTATICBUFLEN
, &bytes_copied
); 
4022         if (error 
== ENAMETOOLONG
) { 
4023                 old_rootfs_path_after_buf 
= zalloc_flags(ZV_NAMEI
, Z_WAITOK
); 
4024                 error 
= copyinstr(uap
->old_rootfs_path_after
, old_rootfs_path_after_buf
, MAXPATHLEN
, &bytes_copied
); 
4030         if (new_rootfs_path_before_buf
) { 
4031                 incoming 
= new_rootfs_path_before_buf
; 
4033                 incoming 
= &new_rootfs_path_before
[0]; 
4036         if (old_rootfs_path_after_buf
) { 
4037                 outgoing 
= old_rootfs_path_after_buf
; 
4039                 outgoing 
= &old_rootfs_path_after
[0]; 
4043          * The proposed incoming FS MUST be authenticated (i.e. not a chunklist DMG). 
4044          * Userland is not allowed to pivot to an image. 
4046         error 
= vnode_lookup(incoming
, 0, &incoming_rootvp
, vfs_context_kernel()); 
4050         error 
= VNOP_IOCTL(incoming_rootvp
, FSIOC_KERNEL_ROOTAUTH
, NULL
, 0, vfs_context_kernel()); 
4055         error 
= vfs_switch_root(incoming
, outgoing
, VFSSR_VIRTUALDEV_PROHIBITED
); 
4058         if (incoming_rootvp 
!= NULLVP
) { 
4059                 vnode_put(incoming_rootvp
); 
4060                 incoming_rootvp 
= NULLVP
; 
4063         if (old_rootfs_path_after_buf
) { 
4064                 zfree(ZV_NAMEI
, old_rootfs_path_after_buf
); 
4067         if (new_rootfs_path_before_buf
) { 
4068                 zfree(ZV_NAMEI
, new_rootfs_path_before_buf
); 
4075 pivot_root(proc_t p
, __unused 
struct pivot_root_args 
*uap
, int *retval
) 
4077         return nosys(p
, NULL
, retval
); 
4079 #endif /* XNU_TARGET_OS_OSX */ 
4082  * Common routine for chroot and chdir. 
4084  * Returns:     0                       Success 
4085  *              ENOTDIR                 Not a directory 
4086  *              namei:???               [anything namei can return] 
4087  *              vnode_authorize:???     [anything vnode_authorize can return] 
4090 change_dir(struct nameidata 
*ndp
, vfs_context_t ctx
) 
4095         if ((error 
= namei(ndp
))) { 
4101         if (vp
->v_type 
!= VDIR
) { 
4107         error 
= mac_vnode_check_chdir(ctx
, vp
); 
4114         error 
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_SEARCH
, ctx
); 
4124  * Free the vnode data (for directories) associated with the file glob. 
4127 fg_vn_data_alloc(void) 
4129         struct fd_vn_data 
*fvdata
; 
4131         /* Allocate per fd vnode data */ 
4132         fvdata 
= kheap_alloc(KM_FD_VN_DATA
, sizeof(struct fd_vn_data
), 
4134         lck_mtx_init(&fvdata
->fv_lock
, &fd_vn_lck_grp
, &fd_vn_lck_attr
); 
4139  * Free the vnode data (for directories) associated with the file glob. 
4142 fg_vn_data_free(void *fgvndata
) 
4144         struct fd_vn_data 
*fvdata 
= (struct fd_vn_data 
*)fgvndata
; 
4146         kheap_free(KHEAP_DATA_BUFFERS
, fvdata
->fv_buf
, fvdata
->fv_bufallocsiz
); 
4147         lck_mtx_destroy(&fvdata
->fv_lock
, &fd_vn_lck_grp
); 
4148         kheap_free(KM_FD_VN_DATA
, fvdata
, sizeof(struct fd_vn_data
)); 
4152  * Check permissions, allocate an open file structure, 
4153  * and call the device open routine if any. 
4155  * Returns:     0                       Success 
4166  * XXX Need to implement uid, gid 
4169 open1(vfs_context_t ctx
, struct nameidata 
*ndp
, int uflags
, 
4170     struct vnode_attr 
*vap
, fp_allocfn_t fp_zalloc
, void *cra
, 
4173         proc_t p 
= vfs_context_proc(ctx
); 
4174         uthread_t uu 
= get_bsdthread_info(vfs_context_thread(ctx
)); 
4175         struct fileproc 
*fp
; 
4178         int type
, indx
, error
; 
4179         struct vfs_context context
; 
4183         if ((oflags 
& O_ACCMODE
) == O_ACCMODE
) { 
4187         flags 
= FFLAGS(uflags
); 
4188         CLR(flags
, FENCRYPTED
); 
4189         CLR(flags
, FUNENCRYPTED
); 
4191         AUDIT_ARG(fflags
, oflags
); 
4192         AUDIT_ARG(mode
, vap
->va_mode
); 
4194         if ((error 
= falloc_withalloc(p
, 
4195             &fp
, &indx
, ctx
, fp_zalloc
, cra
)) != 0) { 
4198         uu
->uu_dupfd 
= -indx 
- 1; 
4200         if ((error 
= vn_open_auth(ndp
, &flags
, vap
))) { 
4201                 if ((error 
== ENODEV 
|| error 
== ENXIO
) && (uu
->uu_dupfd 
>= 0)) {       /* XXX from fdopen */ 
4202                         if ((error 
= dupfdopen(p
->p_fd
, indx
, uu
->uu_dupfd
, flags
, error
)) == 0) { 
4203                                 fp_drop(p
, indx
, NULL
, 0); 
4208                 if (error 
== ERESTART
) { 
4211                 fp_free(p
, indx
, fp
); 
4217         fp
->fp_glob
->fg_flag 
= flags 
& (FMASK 
| O_EVTONLY 
| FENCRYPTED 
| FUNENCRYPTED
); 
4218         fp
->fp_glob
->fg_ops 
= &vnops
; 
4219         fp
->fp_glob
->fg_data 
= (caddr_t
)vp
; 
4221         if (flags 
& (O_EXLOCK 
| O_SHLOCK
)) { 
4223                         .l_whence 
= SEEK_SET
, 
4226                 if (flags 
& O_EXLOCK
) { 
4227                         lf
.l_type 
= F_WRLCK
; 
4229                         lf
.l_type 
= F_RDLCK
; 
4232                 if ((flags 
& FNONBLOCK
) == 0) { 
4236                 error 
= mac_file_check_lock(vfs_context_ucred(ctx
), fp
->fp_glob
, 
4242                 if ((error 
= VNOP_ADVLOCK(vp
, (caddr_t
)fp
->fp_glob
, F_SETLK
, &lf
, type
, ctx
, NULL
))) { 
4245                 fp
->fp_glob
->fg_flag 
|= FWASLOCKED
; 
4248         /* try to truncate by setting the size attribute */ 
4249         if ((flags 
& O_TRUNC
) && ((error 
= vnode_setsize(vp
, (off_t
)0, 0, ctx
)) != 0)) { 
4254          * For directories we hold some additional information in the fd. 
4256         if (vnode_vtype(vp
) == VDIR
) { 
4257                 fp
->fp_glob
->fg_vn_data 
= fg_vn_data_alloc(); 
4259                 fp
->fp_glob
->fg_vn_data 
= NULL
; 
4265          * The first terminal open (without a O_NOCTTY) by a session leader 
4266          * results in it being set as the controlling terminal. 
4268         if (vnode_istty(vp
) && !(p
->p_flag 
& P_CONTROLT
) && 
4269             !(flags 
& O_NOCTTY
)) { 
4272                 (void)(*fp
->fp_glob
->fg_ops
->fo_ioctl
)(fp
, (int)TIOCSCTTY
, 
4273                     (caddr_t
)&tmp
, ctx
); 
4277         if (flags 
& O_CLOEXEC
) { 
4278                 *fdflags(p
, indx
) |= UF_EXCLOSE
; 
4280         if (flags 
& O_CLOFORK
) { 
4281                 *fdflags(p
, indx
) |= UF_FORKCLOSE
; 
4283         procfdtbl_releasefd(p
, indx
, NULL
); 
4285 #if CONFIG_SECLUDED_MEMORY 
4286         if (secluded_for_filecache 
&& 
4287             FILEGLOB_DTYPE(fp
->fp_glob
) == DTYPE_VNODE 
&& 
4288             vnode_vtype(vp
) == VREG
) { 
4289                 memory_object_control_t moc
; 
4291                 moc 
= ubc_getobject(vp
, UBC_FLAGS_NONE
); 
4293                 if (moc 
== MEMORY_OBJECT_CONTROL_NULL
) { 
4294                         /* nothing to do... */ 
4295                 } else if (fp
->fp_glob
->fg_flag 
& FWRITE
) { 
4296                         /* writable -> no longer  eligible for secluded pages */ 
4297                         memory_object_mark_eligible_for_secluded(moc
, 
4299                 } else if (secluded_for_filecache 
== 1) { 
4300                         char pathname
[32] = { 0, }; 
4302                         /* XXX FBDP: better way to detect /Applications/ ? */ 
4303                         if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) { 
4304                                 (void)copyinstr(ndp
->ni_dirp
, 
4309                                 copystr(CAST_DOWN(void *, ndp
->ni_dirp
), 
4314                         pathname
[sizeof(pathname
) - 1] = '\0'; 
4315                         if (strncmp(pathname
, 
4317                             strlen("/Applications/")) == 0 && 
4319                             "/Applications/Camera.app/", 
4320                             strlen("/Applications/Camera.app/")) != 0) { 
4323                                  * AND from "/Applications/" 
4324                                  * AND not from "/Applications/Camera.app/" 
4325                                  * ==> eligible for secluded 
4327                                 memory_object_mark_eligible_for_secluded(moc
, 
4330                 } else if (secluded_for_filecache 
== 2) { 
4332 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64" 
4334 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7" 
4336 /* not implemented... */ 
4338                         size_t len 
= strlen(vp
->v_name
); 
4339                         if (!strncmp(vp
->v_name
, DYLD_SHARED_CACHE_NAME
, len
) || 
4340                             !strncmp(vp
->v_name
, "dyld", len
) || 
4341                             !strncmp(vp
->v_name
, "launchd", len
) || 
4342                             !strncmp(vp
->v_name
, "Camera", len
) || 
4343                             !strncmp(vp
->v_name
, "mediaserverd", len
) || 
4344                             !strncmp(vp
->v_name
, "SpringBoard", len
) || 
4345                             !strncmp(vp
->v_name
, "backboardd", len
)) { 
4347                                  * This file matters when launching Camera: 
4348                                  * do not store its contents in the secluded 
4349                                  * pool that will be drained on Camera launch. 
4351                                 memory_object_mark_eligible_for_secluded(moc
, 
4356 #endif /* CONFIG_SECLUDED_MEMORY */ 
4358         fp_drop(p
, indx
, fp
, 1); 
4365         context 
= *vfs_context_current(); 
4366         context
.vc_ucred 
= fp
->fp_glob
->fg_cred
; 
4368         if ((fp
->fp_glob
->fg_flag 
& FWASLOCKED
) && 
4369             (FILEGLOB_DTYPE(fp
->fp_glob
) == DTYPE_VNODE
)) { 
4371                         .l_whence 
= SEEK_SET
, 
4376                         vp
, (caddr_t
)fp
->fp_glob
, F_UNLCK
, &lf
, F_FLOCK
, ctx
, NULL
); 
4379         vn_close(vp
, fp
->fp_glob
->fg_flag
, &context
); 
4381         fp_free(p
, indx
, fp
); 
4387  * While most of the *at syscall handlers can call nameiat() which 
4388  * is a wrapper around namei, the use of namei and initialisation 
4389  * of nameidata are far removed and in different functions  - namei 
4390  * gets called in vn_open_auth for open1. So we'll just do here what 
4394 open1at(vfs_context_t ctx
, struct nameidata 
*ndp
, int uflags
, 
4395     struct vnode_attr 
*vap
, fp_allocfn_t fp_zalloc
, void *cra
, int32_t *retval
, 
4398         if ((dirfd 
!= AT_FDCWD
) && !(ndp
->ni_cnd
.cn_flags 
& USEDVP
)) { 
4402                 if (UIO_SEG_IS_USER_SPACE(ndp
->ni_segflg
)) { 
4403                         error 
= copyin(ndp
->ni_dirp
, &c
, sizeof(char)); 
4408                         c 
= *((char *)(ndp
->ni_dirp
)); 
4414                         error 
= vnode_getfromfd(ndp
->ni_cnd
.cn_context
, dirfd
, 
4420                         if (vnode_vtype(dvp_at
) != VDIR
) { 
4425                         ndp
->ni_dvp 
= dvp_at
; 
4426                         ndp
->ni_cnd
.cn_flags 
|= USEDVP
; 
4427                         error 
= open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
, 
4434         return open1(ctx
, ndp
, uflags
, vap
, fp_zalloc
, cra
, retval
); 
4438  * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)). 
4440  * Parameters:  p                       Process requesting the open 
4441  *              uap                     User argument descriptor (see below) 
4442  *              retval                  Pointer to an area to receive the 
4443  *                                      return calue from the system call 
4445  * Indirect:    uap->path               Path to open (same as 'open') 
4446  *              uap->flags              Flags to open (same as 'open' 
4447  *              uap->uid                UID to set, if creating 
4448  *              uap->gid                GID to set, if creating 
4449  *              uap->mode               File mode, if creating (same as 'open') 
4450  *              uap->xsecurity          ACL to set, if creating 
4452  * Returns:     0                       Success 
4455  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order. 
4457  * XXX:         We should enummerate the possible errno values here, and where 
4458  *              in the code they originated. 
4461 open_extended(proc_t p
, struct open_extended_args 
*uap
, int32_t *retval
) 
4463         struct filedesc 
*fdp 
= p
->p_fd
; 
4465         kauth_filesec_t xsecdst
; 
4466         struct vnode_attr va
; 
4467         struct nameidata nd
; 
4470         AUDIT_ARG(owner
, uap
->uid
, uap
->gid
); 
4473         if ((uap
->xsecurity 
!= USER_ADDR_NULL
) && 
4474             ((ciferror 
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)) { 
4479         cmode 
= ((uap
->mode 
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
; 
4480         VATTR_SET(&va
, va_mode
, cmode 
& ACCESSPERMS
); 
4481         if (uap
->uid 
!= KAUTH_UID_NONE
) { 
4482                 VATTR_SET(&va
, va_uid
, uap
->uid
); 
4484         if (uap
->gid 
!= KAUTH_GID_NONE
) { 
4485                 VATTR_SET(&va
, va_gid
, uap
->gid
); 
4487         if (xsecdst 
!= NULL
) { 
4488                 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
); 
4491         NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW 
| AUDITVNPATH1
, UIO_USERSPACE
, 
4492             uap
->path
, vfs_context_current()); 
4494         ciferror 
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
, 
4495             fileproc_alloc_init
, NULL
, retval
); 
4496         if (xsecdst 
!= NULL
) { 
4497                 kauth_filesec_free(xsecdst
); 
4504  * Go through the data-protected atomically controlled open (2) 
4506  * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode) 
4509 open_dprotected_np(__unused proc_t p
, struct open_dprotected_np_args 
*uap
, int32_t *retval
) 
4511         int flags 
= uap
->flags
; 
4512         int class = uap
->class; 
4513         int dpflags 
= uap
->dpflags
; 
4516          * Follow the same path as normal open(2) 
4517          * Look up the item if it exists, and acquire the vnode. 
4519         struct filedesc 
*fdp 
= p
->p_fd
; 
4520         struct vnode_attr va
; 
4521         struct nameidata nd
; 
4526         /* Mask off all but regular access permissions */ 
4527         cmode 
= ((uap
->mode 
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
; 
4528         VATTR_SET(&va
, va_mode
, cmode 
& ACCESSPERMS
); 
4530         NDINIT(&nd
, LOOKUP
, OP_OPEN
, FOLLOW 
| AUDITVNPATH1
, UIO_USERSPACE
, 
4531             uap
->path
, vfs_context_current()); 
4534          * Initialize the extra fields in vnode_attr to pass down our 
4536          * 1. target cprotect class. 
4537          * 2. set a flag to mark it as requiring open-raw-encrypted semantics. 
4539         if (flags 
& O_CREAT
) { 
4540                 /* lower level kernel code validates that the class is valid before applying it. */ 
4541                 if (class != PROTECTION_CLASS_DEFAULT
) { 
4543                          * PROTECTION_CLASS_DEFAULT implies that we make the class for this 
4544                          * file behave the same as open (2) 
4546                         VATTR_SET(&va
, va_dataprotect_class
, class); 
4550         if (dpflags 
& (O_DP_GETRAWENCRYPTED 
| O_DP_GETRAWUNENCRYPTED
)) { 
4551                 if (flags 
& (O_RDWR 
| O_WRONLY
)) { 
4552                         /* Not allowed to write raw encrypted bytes */ 
4555                 if (uap
->dpflags 
& O_DP_GETRAWENCRYPTED
) { 
4556                         VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWENCRYPTED
); 
4558                 if (uap
->dpflags 
& O_DP_GETRAWUNENCRYPTED
) { 
4559                         VATTR_SET(&va
, va_dataprotect_flags
, VA_DP_RAWUNENCRYPTED
); 
4563         error 
= open1(vfs_context_current(), &nd
, uap
->flags
, &va
, 
4564             fileproc_alloc_init
, NULL
, retval
); 
4570 openat_internal(vfs_context_t ctx
, user_addr_t path
, int flags
, int mode
, 
4571     int fd
, enum uio_seg segflg
, int *retval
) 
4573         struct filedesc 
*fdp 
= (vfs_context_proc(ctx
))->p_fd
; 
4575                 struct vnode_attr va
; 
4576                 struct nameidata nd
; 
4578         struct vnode_attr 
*vap
; 
4579         struct nameidata 
*ndp
; 
4583         __open_data 
= kheap_alloc(KHEAP_TEMP
, sizeof(*__open_data
), Z_WAITOK
); 
4584         vap 
= &__open_data
->va
; 
4585         ndp 
= &__open_data
->nd
; 
4588         /* Mask off all but regular access permissions */ 
4589         cmode 
= ((mode 
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
; 
4590         VATTR_SET(vap
, va_mode
, cmode 
& ACCESSPERMS
); 
4592         NDINIT(ndp
, LOOKUP
, OP_OPEN
, FOLLOW 
| AUDITVNPATH1
, 
4595         error 
= open1at(ctx
, ndp
, flags
, vap
, fileproc_alloc_init
, NULL
, 
4598         kheap_free(KHEAP_TEMP
, __open_data
, sizeof(*__open_data
)); 
4604 open(proc_t p
, struct open_args 
*uap
, int32_t *retval
) 
4606         __pthread_testcancel(1); 
4607         return open_nocancel(p
, (struct open_nocancel_args 
*)uap
, retval
); 
4611 open_nocancel(__unused proc_t p
, struct open_nocancel_args 
*uap
, 
4614         return openat_internal(vfs_context_current(), uap
->path
, uap
->flags
, 
4615                    uap
->mode
, AT_FDCWD
, UIO_USERSPACE
, retval
); 
4619 openat_nocancel(__unused proc_t p
, struct openat_nocancel_args 
*uap
, 
4622         return openat_internal(vfs_context_current(), uap
->path
, uap
->flags
, 
4623                    uap
->mode
, uap
->fd
, UIO_USERSPACE
, retval
); 
4627 openat(proc_t p
, struct openat_args 
*uap
, int32_t *retval
) 
4629         __pthread_testcancel(1); 
4630         return openat_nocancel(p
, (struct openat_nocancel_args 
*)uap
, retval
); 
4634  * openbyid_np: open a file given a file system id and a file system object id 
4635  *      the hfs file system object id is an fsobj_id_t {uint32, uint32} 
4636  *      file systems that don't support object ids it is a node id (uint64_t). 
4638  * Parameters:  p                       Process requesting the open 
4639  *              uap                     User argument descriptor (see below) 
4640  *              retval                  Pointer to an area to receive the 
4641  *                                      return calue from the system call 
4643  * Indirect:    uap->path               Path to open (same as 'open') 
4645  *              uap->fsid               id of target file system 
4646  *              uap->objid              id of target file system object 
4647  *              uap->flags              Flags to open (same as 'open') 
4649  * Returns:     0                       Success 
4653  * XXX:         We should enummerate the possible errno values here, and where 
4654  *              in the code they originated. 
4657 openbyid_np(__unused proc_t p
, struct openbyid_np_args 
*uap
, int *retval
) 
4663         int buflen 
= MAXPATHLEN
; 
4665         vfs_context_t ctx 
= vfs_context_current(); 
4667         if ((error 
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_OPEN_BY_ID
, 0))) { 
4671         if ((error 
= copyin(uap
->fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) { 
4675         /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */ 
4676         if ((error 
= copyin(uap
->objid
, (caddr_t
)&objid
, sizeof(uint64_t)))) { 
4680         AUDIT_ARG(value32
, fsid
.val
[0]); 
4681         AUDIT_ARG(value64
, objid
); 
4683         /*resolve path from fsis, objid*/ 
4685                 buf 
= kheap_alloc(KHEAP_TEMP
, buflen 
+ 1, Z_WAITOK
); 
4690                 error 
= fsgetpath_internal( ctx
, fsid
.val
[0], objid
, buflen
, 
4691                     buf
, FSOPT_ISREALFSID
, &pathlen
); 
4694                         kheap_free(KHEAP_TEMP
, buf
, buflen 
+ 1); 
4697         } while (error 
== ENOSPC 
&& (buflen 
+= MAXPATHLEN
)); 
4705         error 
= openat_internal( 
4706                 ctx
, (user_addr_t
)buf
, uap
->oflags
, 0, AT_FDCWD
, UIO_SYSSPACE
, retval
); 
4708         kheap_free(KHEAP_TEMP
, buf
, buflen 
+ 1); 
4715  * Create a special file. 
4717 static int mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr 
*vap
); 
4720 mknod(proc_t p
, struct mknod_args 
*uap
, __unused 
int32_t *retval
) 
4722         struct vnode_attr va
; 
4723         vfs_context_t ctx 
= vfs_context_current(); 
4725         struct nameidata nd
; 
4729         VATTR_SET(&va
, va_mode
, (uap
->mode 
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
); 
4730         VATTR_SET(&va
, va_rdev
, uap
->dev
); 
4732         /* If it's a mknod() of a FIFO, call mkfifo1() instead */ 
4733         if ((uap
->mode 
& S_IFMT
) == S_IFIFO
) { 
4734                 return mkfifo1(ctx
, uap
->path
, &va
); 
4737         AUDIT_ARG(mode
, (mode_t
)uap
->mode
); 
4738         AUDIT_ARG(value32
, uap
->dev
); 
4740         if ((error 
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) { 
4743         NDINIT(&nd
, CREATE
, OP_MKNOD
, LOCKPARENT 
| AUDITVNPATH1
, 
4744             UIO_USERSPACE
, uap
->path
, ctx
); 
4757         switch (uap
->mode 
& S_IFMT
) { 
4759                 VATTR_SET(&va
, va_type
, VCHR
); 
4762                 VATTR_SET(&va
, va_type
, VBLK
); 
4770         error 
= mac_vnode_check_create(ctx
, 
4771             nd
.ni_dvp
, &nd
.ni_cnd
, &va
); 
4777         if ((error 
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) { 
4781         if ((error 
= vn_create(dvp
, &vp
, &nd
, &va
, 0, 0, NULL
, ctx
)) != 0) { 
4786                 int     update_flags 
= 0; 
4788                 // Make sure the name & parent pointers are hooked up 
4789                 if (vp
->v_name 
== NULL
) { 
4790                         update_flags 
|= VNODE_UPDATE_NAME
; 
4792                 if (vp
->v_parent 
== NULLVP
) { 
4793                         update_flags 
|= VNODE_UPDATE_PARENT
; 
4797                         vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
); 
4801                 add_fsevent(FSE_CREATE_FILE
, ctx
, 
4809          * nameidone has to happen before we vnode_put(dvp) 
4810          * since it may need to release the fs_nodelock on the dvp 
4823  * Create a named pipe. 
4825  * Returns:     0                       Success 
4828  *      vnode_authorize:??? 
4832 mkfifo1(vfs_context_t ctx
, user_addr_t upath
, struct vnode_attr 
*vap
) 
4836         struct nameidata nd
; 
4838         NDINIT(&nd
, CREATE
, OP_MKFIFO
, LOCKPARENT 
| AUDITVNPATH1
, 
4839             UIO_USERSPACE
, upath
, ctx
); 
4847         /* check that this is a new file and authorize addition */ 
4852         VATTR_SET(vap
, va_type
, VFIFO
); 
4854         if ((error 
= vn_authorize_create(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) { 
4858         error 
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
); 
4861          * nameidone has to happen before we vnode_put(dvp) 
4862          * since it may need to release the fs_nodelock on the dvp 
4876  * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)). 
4878  * Parameters:  p                       Process requesting the open 
4879  *              uap                     User argument descriptor (see below) 
4882  * Indirect:    uap->path               Path to fifo (same as 'mkfifo') 
4883  *              uap->uid                UID to set 
4884  *              uap->gid                GID to set 
4885  *              uap->mode               File mode to set (same as 'mkfifo') 
4886  *              uap->xsecurity          ACL to set, if creating 
4888  * Returns:     0                       Success 
4891  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order. 
4893  * XXX:         We should enummerate the possible errno values here, and where 
4894  *              in the code they originated. 
4897 mkfifo_extended(proc_t p
, struct mkfifo_extended_args 
*uap
, __unused 
int32_t *retval
) 
4900         kauth_filesec_t xsecdst
; 
4901         struct vnode_attr va
; 
4903         AUDIT_ARG(owner
, uap
->uid
, uap
->gid
); 
4905         xsecdst 
= KAUTH_FILESEC_NONE
; 
4906         if (uap
->xsecurity 
!= USER_ADDR_NULL
) { 
4907                 if ((ciferror 
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) { 
4913         VATTR_SET(&va
, va_mode
, (uap
->mode 
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
); 
4914         if (uap
->uid 
!= KAUTH_UID_NONE
) { 
4915                 VATTR_SET(&va
, va_uid
, uap
->uid
); 
4917         if (uap
->gid 
!= KAUTH_GID_NONE
) { 
4918                 VATTR_SET(&va
, va_gid
, uap
->gid
); 
4920         if (xsecdst 
!= KAUTH_FILESEC_NONE
) { 
4921                 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
); 
4924         ciferror 
= mkfifo1(vfs_context_current(), uap
->path
, &va
); 
4926         if (xsecdst 
!= KAUTH_FILESEC_NONE
) { 
4927                 kauth_filesec_free(xsecdst
); 
4934 mkfifo(proc_t p
, struct mkfifo_args 
*uap
, __unused 
int32_t *retval
) 
4936         struct vnode_attr va
; 
4939         VATTR_SET(&va
, va_mode
, (uap
->mode 
& ALLPERMS
) & ~p
->p_fd
->fd_cmask
); 
4941         return mkfifo1(vfs_context_current(), uap
->path
, &va
); 
4944 extern int safe_getpath_new(struct vnode 
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
, int firmlink
); 
4945 extern int safe_getpath(struct vnode 
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
); 
4946 extern int safe_getpath_no_firmlink(struct vnode 
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
); 
4949 safe_getpath_new(struct vnode 
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
, int firmlink
) 
4951         int ret
, len 
= _len
; 
4953         *truncated_path 
= 0; 
4956                 ret 
= vn_getpath(dvp
, path
, &len
); 
4958                 ret 
= vn_getpath_no_firmlink(dvp
, path
, &len
); 
4960         if (ret 
== 0 && len 
< (MAXPATHLEN 
- 1)) { 
4962                         path
[len 
- 1] = '/'; 
4963                         len 
+= strlcpy(&path
[len
], leafname
, MAXPATHLEN 
- len
) + 1; 
4964                         if (len 
> MAXPATHLEN
) { 
4967                                 // the string got truncated! 
4968                                 *truncated_path 
= 1; 
4969                                 ptr 
= strrchr(path
, '/'); 
4971                                         *ptr 
= '\0';   // chop off the string at the last directory component 
4973                                 len 
= (int)strlen(path
) + 1; 
4976         } else if (ret 
== 0) { 
4977                 *truncated_path 
= 1; 
4978         } else if (ret 
!= 0) { 
4979                 struct vnode 
*mydvp 
= dvp
; 
4981                 if (ret 
!= ENOSPC
) { 
4982                         printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n", 
4983                             dvp
, dvp
->v_name 
? dvp
->v_name 
: "no-name", ret
); 
4985                 *truncated_path 
= 1; 
4988                         if (mydvp
->v_parent 
!= NULL
) { 
4989                                 mydvp 
= mydvp
->v_parent
; 
4990                         } else if (mydvp
->v_mount
) { 
4991                                 strlcpy(path
, mydvp
->v_mount
->mnt_vfsstat
.f_mntonname
, _len
); 
4994                                 // no parent and no mount point?  only thing is to punt and say "/" changed 
4995                                 strlcpy(path
, "/", _len
); 
5000                         if (mydvp 
== NULL
) { 
5006                                 ret 
= vn_getpath(mydvp
, path
, &len
); 
5008                                 ret 
= vn_getpath_no_firmlink(mydvp
, path
, &len
); 
5010                 } while (ret 
== ENOSPC
); 
5017 safe_getpath(struct vnode 
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
) 
5019         return safe_getpath_new(dvp
, leafname
, path
, _len
, truncated_path
, 1); 
5023 safe_getpath_no_firmlink(struct vnode 
*dvp
, char *leafname
, char *path
, int _len
, int *truncated_path
) 
5025         return safe_getpath_new(dvp
, leafname
, path
, _len
, truncated_path
, 0); 
5029  * Make a hard file link. 
5031  * Returns:     0                       Success 
5036  *      vnode_authorize:??? 
5041 linkat_internal(vfs_context_t ctx
, int fd1
, user_addr_t path
, int fd2
, 
5042     user_addr_t link
, int flag
, enum uio_seg segflg
) 
5044         vnode_t vp
, pvp
, dvp
, lvp
; 
5045         struct nameidata nd
; 
5051         int need_event
, has_listeners
, need_kpath2
; 
5052         char *target_path 
= NULL
; 
5055         vp 
= dvp 
= lvp 
= NULLVP
; 
5057         /* look up the object we are linking to */ 
5058         follow 
= (flag 
& AT_SYMLINK_FOLLOW
) ? FOLLOW 
: NOFOLLOW
; 
5059         NDINIT(&nd
, LOOKUP
, OP_LOOKUP
, AUDITVNPATH1 
| follow
, 
5062         error 
= nameiat(&nd
, fd1
); 
5071          * Normally, linking to directories is not supported. 
5072          * However, some file systems may have limited support. 
5074         if (vp
->v_type 
== VDIR
) { 
5075                 if (!ISSET(vp
->v_mount
->mnt_kern_flag
, MNTK_DIR_HARDLINKS
)) { 
5076                         error 
= EPERM
;   /* POSIX */ 
5080                 /* Linking to a directory requires ownership. */ 
5081                 if (!kauth_cred_issuser(vfs_context_ucred(ctx
))) { 
5082                         struct vnode_attr dva
; 
5085                         VATTR_WANTED(&dva
, va_uid
); 
5086                         if (vnode_getattr(vp
, &dva
, ctx
) != 0 || 
5087                             !VATTR_IS_SUPPORTED(&dva
, va_uid
) || 
5088                             (dva
.va_uid 
!= kauth_cred_getuid(vfs_context_ucred(ctx
)))) { 
5095         /* lookup the target node */ 
5099         nd
.ni_cnd
.cn_nameiop 
= CREATE
; 
5100         nd
.ni_cnd
.cn_flags 
= LOCKPARENT 
| AUDITVNPATH2 
| CN_NBMOUNTLOOK
; 
5102         error 
= nameiat(&nd
, fd2
); 
5110         if ((error 
= mac_vnode_check_link(ctx
, dvp
, vp
, &nd
.ni_cnd
)) != 0) { 
5115         /* or to anything that kauth doesn't want us to (eg. immutable items) */ 
5116         if ((error 
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_LINKTARGET
, ctx
)) != 0) { 
5120         /* target node must not exist */ 
5121         if (lvp 
!= NULLVP
) { 
5125         /* cannot link across mountpoints */ 
5126         if (vnode_mount(vp
) != vnode_mount(dvp
)) { 
5131         /* authorize creation of the target note */ 
5132         if ((error 
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) { 
5136         /* and finally make the link */ 
5137         error 
= VNOP_LINK(vp
, dvp
, &nd
.ni_cnd
, ctx
); 
5143         (void)mac_vnode_notify_link(ctx
, vp
, dvp
, &nd
.ni_cnd
); 
5147         need_event 
= need_fsevent(FSE_CREATE_FILE
, dvp
); 
5151         has_listeners 
= kauth_authorize_fileop_has_listeners(); 
5155         if (AUDIT_RECORD_EXISTS()) { 
5160         if (need_event 
|| has_listeners 
|| need_kpath2
) { 
5161                 char *link_to_path 
= NULL
; 
5162                 int len
, link_name_len
; 
5164                 /* build the path to the new link file */ 
5165                 GET_PATH(target_path
); 
5167                 len 
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, target_path
, MAXPATHLEN
, &truncated
); 
5169                 AUDIT_ARG(kpath
, target_path
, ARG_KPATH2
); 
5171                 if (has_listeners
) { 
5172                         /* build the path to file we are linking to */ 
5173                         GET_PATH(link_to_path
); 
5175                         link_name_len 
= MAXPATHLEN
; 
5176                         if (vn_getpath(vp
, link_to_path
, &link_name_len
) == 0) { 
5178                                  * Call out to allow 3rd party notification of rename. 
5179                                  * Ignore result of kauth_authorize_fileop call. 
5181                                 kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_LINK
, 
5182                                     (uintptr_t)link_to_path
, 
5183                                     (uintptr_t)target_path
); 
5185                         if (link_to_path 
!= NULL
) { 
5186                                 RELEASE_PATH(link_to_path
); 
5191                         /* construct fsevent */ 
5192                         if (get_fse_info(vp
, &finfo
, ctx
) == 0) { 
5194                                         finfo
.mode 
|= FSE_TRUNCATED_PATH
; 
5197                                 // build the path to the destination of the link 
5198                                 add_fsevent(FSE_CREATE_FILE
, ctx
, 
5199                                     FSE_ARG_STRING
, len
, target_path
, 
5200                                     FSE_ARG_FINFO
, &finfo
, 
5205                         // need an iocount on pvp in this case 
5206                         if (pvp 
&& pvp 
!= dvp
) { 
5207                                 error 
= vnode_get(pvp
); 
5214                                 add_fsevent(FSE_STAT_CHANGED
, ctx
, 
5215                                     FSE_ARG_VNODE
, pvp
, FSE_ARG_DONE
); 
5217                         if (pvp 
&& pvp 
!= dvp
) { 
5225          * nameidone has to happen before we vnode_put(dvp) 
5226          * since it may need to release the fs_nodelock on the dvp 
5229         if (target_path 
!= NULL
) { 
5230                 RELEASE_PATH(target_path
); 
5244 link(__unused proc_t p
, struct link_args 
*uap
, __unused 
int32_t *retval
) 
5246         return linkat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
, 
5247                    AT_FDCWD
, uap
->link
, AT_SYMLINK_FOLLOW
, UIO_USERSPACE
); 
5251 linkat(__unused proc_t p
, struct linkat_args 
*uap
, __unused 
int32_t *retval
) 
5253         if (uap
->flag 
& ~AT_SYMLINK_FOLLOW
) { 
5257         return linkat_internal(vfs_context_current(), uap
->fd1
, uap
->path
, 
5258                    uap
->fd2
, uap
->link
, uap
->flag
, UIO_USERSPACE
); 
5262  * Make a symbolic link. 
5264  * We could add support for ACLs here too... 
5268 symlinkat_internal(vfs_context_t ctx
, user_addr_t path_data
, int fd
, 
5269     user_addr_t link
, enum uio_seg segflg
) 
5271         struct vnode_attr va
; 
5274         struct nameidata nd
; 
5280         if (UIO_SEG_IS_USER_SPACE(segflg
)) { 
5281                 path 
= zalloc(ZV_NAMEI
); 
5282                 error 
= copyinstr(path_data
, path
, MAXPATHLEN
, &dummy
); 
5284                 path 
= (char *)path_data
; 
5289         AUDIT_ARG(text
, path
);  /* This is the link string */ 
5291         NDINIT(&nd
, CREATE
, OP_SYMLINK
, LOCKPARENT 
| AUDITVNPATH1
, 
5294         error 
= nameiat(&nd
, fd
); 
5301         p 
= vfs_context_proc(ctx
); 
5303         VATTR_SET(&va
, va_type
, VLNK
); 
5304         VATTR_SET(&va
, va_mode
, ACCESSPERMS 
& ~p
->p_fd
->fd_cmask
); 
5307         error 
= mac_vnode_check_create(ctx
, 
5308             dvp
, &nd
.ni_cnd
, &va
); 
5321                 error 
= vnode_authorize(dvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
); 
5323         /* get default ownership, etc. */ 
5325                 error 
= vnode_authattr_new(dvp
, &va
, 0, ctx
); 
5328                 error 
= VNOP_SYMLINK(dvp
, &vp
, &nd
.ni_cnd
, &va
, path
, ctx
); 
5331         /* do fallback attribute handling */ 
5332         if (error 
== 0 && vp
) { 
5333                 error 
= vnode_setattr_fallback(vp
, &va
, ctx
); 
5337         if (error 
== 0 && vp
) { 
5338                 error 
= vnode_label(vnode_mount(vp
), dvp
, vp
, &nd
.ni_cnd
, VNODE_LABEL_CREATE
, ctx
); 
5343                 int     update_flags 
= 0; 
5345                 /*check if a new vnode was created, else try to get one*/ 
5347                         nd
.ni_cnd
.cn_nameiop 
= LOOKUP
; 
5349                         nd
.ni_op 
= OP_LOOKUP
; 
5351                         nd
.ni_cnd
.cn_flags 
= 0; 
5352                         error 
= nameiat(&nd
, fd
); 
5360 #if 0  /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */ 
5361                 /* call out to allow 3rd party notification of rename. 
5362                  * Ignore result of kauth_authorize_fileop call. 
5364                 if (kauth_authorize_fileop_has_listeners() && 
5366                         char *new_link_path 
= NULL
; 
5369                         /* build the path to the new link file */ 
5370                         new_link_path 
= get_pathbuff(); 
5372                         vn_getpath(dvp
, new_link_path
, &len
); 
5373                         if ((len 
+ 1 + nd
.ni_cnd
.cn_namelen 
+ 1) < MAXPATHLEN
) { 
5374                                 new_link_path
[len 
- 1] = '/'; 
5375                                 strlcpy(&new_link_path
[len
], nd
.ni_cnd
.cn_nameptr
, MAXPATHLEN 
- len
); 
5378                         kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_SYMLINK
, 
5379                             (uintptr_t)path
, (uintptr_t)new_link_path
); 
5380                         if (new_link_path 
!= NULL
) { 
5381                                 release_pathbuff(new_link_path
); 
5385                 // Make sure the name & parent pointers are hooked up 
5386                 if (vp
->v_name 
== NULL
) { 
5387                         update_flags 
|= VNODE_UPDATE_NAME
; 
5389                 if (vp
->v_parent 
== NULLVP
) { 
5390                         update_flags 
|= VNODE_UPDATE_PARENT
; 
5394                         vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
); 
5398                 add_fsevent(FSE_CREATE_FILE
, ctx
, 
5406          * nameidone has to happen before we vnode_put(dvp) 
5407          * since it may need to release the fs_nodelock on the dvp 
5416         if (path 
&& (path 
!= (char *)path_data
)) { 
5417                 zfree(ZV_NAMEI
, path
); 
5424 symlink(__unused proc_t p
, struct symlink_args 
*uap
, __unused 
int32_t *retval
) 
5426         return symlinkat_internal(vfs_context_current(), uap
->path
, AT_FDCWD
, 
5427                    uap
->link
, UIO_USERSPACE
); 
5431 symlinkat(__unused proc_t p
, struct symlinkat_args 
*uap
, 
5432     __unused 
int32_t *retval
) 
5434         return symlinkat_internal(vfs_context_current(), uap
->path1
, uap
->fd
, 
5435                    uap
->path2
, UIO_USERSPACE
); 
5439  * Delete a whiteout from the filesystem. 
5440  * No longer supported. 
5443 undelete(__unused proc_t p
, __unused 
struct undelete_args 
*uap
, __unused 
int32_t *retval
) 
5449  * Delete a name from the filesystem. 
5453 unlinkat_internal(vfs_context_t ctx
, int fd
, vnode_t start_dvp
, 
5454     user_addr_t path_arg
, enum uio_seg segflg
, int unlink_flags
) 
5456         struct nameidata nd
; 
5459         struct componentname 
*cnp
; 
5461         char  *no_firmlink_path 
= NULL
; 
5463         int  len_no_firmlink_path 
= 0; 
5466         struct vnode_attr va
; 
5472         int truncated_no_firmlink_path
; 
5474         struct vnode_attr 
*vap
; 
5476         int retry_count 
= 0; 
5479         cn_flags 
= LOCKPARENT
; 
5480         if (!(unlink_flags 
& VNODE_REMOVE_NO_AUDIT_PATH
)) { 
5481                 cn_flags 
|= AUDITVNPATH1
; 
5483         /* If a starting dvp is passed, it trumps any fd passed. */ 
5489         /* unlink or delete is allowed on rsrc forks and named streams */ 
5490         cn_flags 
|= CN_ALLOWRSRCFORK
; 
5499         truncated_no_firmlink_path 
= 0; 
5502         NDINIT(&nd
, DELETE
, OP_UNLINK
, cn_flags
, segflg
, path_arg
, ctx
); 
5504         nd
.ni_dvp 
= start_dvp
; 
5505         nd
.ni_flag 
|= NAMEI_COMPOUNDREMOVE
; 
5509         error 
= nameiat(&nd
, fd
); 
5518         /* With Carbon delete semantics, busy files cannot be deleted */ 
5519         if (unlink_flags 
& VNODE_REMOVE_NODELETEBUSY
) { 
5520                 flags 
|= VNODE_REMOVE_NODELETEBUSY
; 
5523         /* Skip any potential upcalls if told to. */ 
5524         if (unlink_flags 
& VNODE_REMOVE_SKIP_NAMESPACE_EVENT
) { 
5525                 flags 
|= VNODE_REMOVE_SKIP_NAMESPACE_EVENT
; 
5529                 batched 
= vnode_compound_remove_available(vp
); 
5531                  * The root of a mounted filesystem cannot be deleted. 
5533                 if ((vp
->v_flag 
& VROOT
) || (dvp
->v_mount 
!= vp
->v_mount
)) { 
5538 #if DEVELOPMENT || DEBUG 
5540                  * XXX VSWAP: Check for entitlements or special flag here 
5541                  * so we can restrict access appropriately. 
5543 #else /* DEVELOPMENT || DEBUG */ 
5545                 if (vnode_isswap(vp
) && (ctx 
!= vfs_context_kernel())) { 
5549 #endif /* DEVELOPMENT || DEBUG */ 
5552                         error 
= vn_authorize_unlink(dvp
, vp
, cnp
, ctx
, NULL
); 
5554                                 if (error 
== ENOENT
) { 
5555                                         if (retry_count 
< MAX_AUTHORIZE_ENOENT_RETRIES
) { 
5566                 if (!vnode_compound_remove_available(dvp
)) { 
5567                         panic("No vp, but no compound remove?"); 
5572         need_event 
= need_fsevent(FSE_DELETE
, dvp
); 
5575                         if ((vp
->v_flag 
& VISHARDLINK
) == 0) { 
5576                                 /* XXX need to get these data in batched VNOP */ 
5577                                 get_fse_info(vp
, &finfo
, ctx
); 
5580                         error 
= vfs_get_notify_attributes(&va
); 
5589         has_listeners 
= kauth_authorize_fileop_has_listeners(); 
5590         if (need_event 
|| has_listeners
) { 
5594                 len_path 
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
); 
5595                 if (no_firmlink_path 
== NULL
) { 
5596                         GET_PATH(no_firmlink_path
); 
5598                 len_no_firmlink_path 
= safe_getpath_no_firmlink(dvp
, nd
.ni_cnd
.cn_nameptr
, no_firmlink_path
, MAXPATHLEN
, &truncated_no_firmlink_path
); 
5602         if (nd
.ni_cnd
.cn_flags 
& CN_WANTSRSRCFORK
) { 
5603                 error 
= vnode_removenamedstream(dvp
, vp
, XATTR_RESOURCEFORK_NAME
, 0, ctx
); 
5607                 error 
= vn_remove(dvp
, &nd
.ni_vp
, &nd
, flags
, vap
, ctx
); 
5609                 if (error 
== EKEEPLOOKING
) { 
5611                                 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?"); 
5614                         if ((nd
.ni_flag 
& NAMEI_CONTLOOKUP
) == 0) { 
5615                                 panic("EKEEPLOOKING, but continue flag not set?"); 
5618                         if (vnode_isdir(vp
)) { 
5622                         goto continue_lookup
; 
5623                 } else if (error 
== ENOENT 
&& batched
) { 
5624                         if (retry_count 
< MAX_AUTHORIZE_ENOENT_RETRIES
) { 
5626                                  * For compound VNOPs, the authorization callback may 
5627                                  * return ENOENT in case of racing hardlink lookups 
5628                                  * hitting the name  cache, redrive the lookup. 
5638          * Call out to allow 3rd party notification of delete. 
5639          * Ignore result of kauth_authorize_fileop call. 
5642                 if (has_listeners
) { 
5643                         kauth_authorize_fileop(vfs_context_ucred(ctx
), 
5644                             KAUTH_FILEOP_DELETE
, 
5649                 if (vp
->v_flag 
& VISHARDLINK
) { 
5651                         // if a hardlink gets deleted we want to blow away the 
5652                         // v_parent link because the path that got us to this 
5653                         // instance of the link is no longer valid.  this will 
5654                         // force the next call to get the path to ask the file 
5655                         // system instead of just following the v_parent link. 
5657                         vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
); 
5662                         if (vp
->v_flag 
& VISHARDLINK
) { 
5663                                 get_fse_info(vp
, &finfo
, ctx
); 
5665                                 vnode_get_fse_info_from_vap(vp
, &finfo
, vap
); 
5667                         if (truncated_path
) { 
5668                                 finfo
.mode 
|= FSE_TRUNCATED_PATH
; 
5670                         add_fsevent(FSE_DELETE
, ctx
, 
5671                             FSE_ARG_STRING
, len_no_firmlink_path
, no_firmlink_path
, 
5672                             FSE_ARG_FINFO
, &finfo
, 
5684         if (no_firmlink_path 
!= NULL
) { 
5685                 RELEASE_PATH(no_firmlink_path
); 
5686                 no_firmlink_path 
= NULL
; 
5689         /* recycle the deleted rsrc fork vnode to force a reclaim, which 
5690          * will cause its shadow file to go away if necessary. 
5692         if (vp 
&& (vnode_isnamedstream(vp
)) && 
5693             (vp
->v_parent 
!= NULLVP
) && 
5694             vnode_isshadow(vp
)) { 
5699          * nameidone has to happen before we vnode_put(dvp) 
5700          * since it may need to release the fs_nodelock on the dvp 
5716 unlink1(vfs_context_t ctx
, vnode_t start_dvp
, user_addr_t path_arg
, 
5717     enum uio_seg segflg
, int unlink_flags
) 
5719         return unlinkat_internal(ctx
, AT_FDCWD
, start_dvp
, path_arg
, segflg
, 
5724  * Delete a name from the filesystem using Carbon semantics. 
5727 delete(__unused proc_t p
, struct delete_args 
*uap
, __unused 
int32_t *retval
) 
5729         return unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
, 
5730                    uap
->path
, UIO_USERSPACE
, VNODE_REMOVE_NODELETEBUSY
); 
5734  * Delete a name from the filesystem using POSIX semantics. 
5737 unlink(__unused proc_t p
, struct unlink_args 
*uap
, __unused 
int32_t *retval
) 
5739         return unlinkat_internal(vfs_context_current(), AT_FDCWD
, NULLVP
, 
5740                    uap
->path
, UIO_USERSPACE
, 0); 
5744 unlinkat(__unused proc_t p
, struct unlinkat_args 
*uap
, __unused 
int32_t *retval
) 
5746         if (uap
->flag 
& ~(AT_REMOVEDIR 
| AT_REMOVEDIR_DATALESS
)) { 
5750         if (uap
->flag 
& (AT_REMOVEDIR 
| AT_REMOVEDIR_DATALESS
)) { 
5751                 int unlink_flags 
= 0; 
5753                 if (uap
->flag 
& AT_REMOVEDIR_DATALESS
) { 
5754                         unlink_flags 
|= VNODE_REMOVE_DATALESS_DIR
; 
5756                 return rmdirat_internal(vfs_context_current(), uap
->fd
, 
5757                            uap
->path
, UIO_USERSPACE
, unlink_flags
); 
5759                 return unlinkat_internal(vfs_context_current(), uap
->fd
, 
5760                            NULLVP
, uap
->path
, UIO_USERSPACE
, 0); 
5765  * Reposition read/write file offset. 
5768 lseek(proc_t p
, struct lseek_args 
*uap
, off_t 
*retval
) 
5770         struct fileproc 
*fp
; 
5772         struct vfs_context 
*ctx
; 
5773         off_t offset 
= uap
->offset
, file_size
; 
5776         if ((error 
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) { 
5777                 if (error 
== ENOTSUP
) { 
5782         if (vnode_isfifo(vp
)) { 
5788         ctx 
= vfs_context_current(); 
5790         if (uap
->whence 
== L_INCR 
&& uap
->offset 
== 0) { 
5791                 error 
= mac_file_check_get_offset(vfs_context_ucred(ctx
), 
5794                 error 
= mac_file_check_change_offset(vfs_context_ucred(ctx
), 
5802         if ((error 
= vnode_getwithref(vp
))) { 
5807         switch (uap
->whence
) { 
5809                 offset 
+= fp
->fp_glob
->fg_offset
; 
5812                 if ((error 
= vnode_size(vp
, &file_size
, ctx
)) != 0) { 
5815                 offset 
+= file_size
; 
5820                 error 
= VNOP_IOCTL(vp
, FSIOC_FIOSEEKHOLE
, (caddr_t
)&offset
, 0, ctx
); 
5823                 error 
= VNOP_IOCTL(vp
, FSIOC_FIOSEEKDATA
, (caddr_t
)&offset
, 0, ctx
); 
5829                 if (uap
->offset 
> 0 && offset 
< 0) { 
5830                         /* Incremented/relative move past max size */ 
5834                          * Allow negative offsets on character devices, per 
5835                          * POSIX 1003.1-2001.  Most likely for writing disk 
5838                         if (offset 
< 0 && vp
->v_type 
!= VCHR
) { 
5839                                 /* Decremented/relative move before start */ 
5843                                 fp
->fp_glob
->fg_offset 
= offset
; 
5844                                 *retval 
= fp
->fp_glob
->fg_offset
; 
5850          * An lseek can affect whether data is "available to read."  Use 
5851          * hint of NOTE_NONE so no EVFILT_VNODE events fire 
5853         post_event_if_success(vp
, error
, NOTE_NONE
); 
5854         (void)vnode_put(vp
); 
5861  * Check access permissions. 
5863  * Returns:     0                       Success 
5864  *              vnode_authorize:??? 
5867 access1(vnode_t vp
, vnode_t dvp
, int uflags
, vfs_context_t ctx
) 
5869         kauth_action_t action
; 
5873          * If just the regular access bits, convert them to something 
5874          * that vnode_authorize will understand. 
5876         if (!(uflags 
& _ACCESS_EXTENDED_MASK
)) { 
5878                 if (uflags 
& R_OK
) { 
5879                         action 
|= KAUTH_VNODE_READ_DATA
;        /* aka KAUTH_VNODE_LIST_DIRECTORY */ 
5881                 if (uflags 
& W_OK
) { 
5882                         if (vnode_isdir(vp
)) { 
5883                                 action 
|= KAUTH_VNODE_ADD_FILE 
| 
5884                                     KAUTH_VNODE_ADD_SUBDIRECTORY
; 
5885                                 /* might want delete rights here too */ 
5887                                 action 
|= KAUTH_VNODE_WRITE_DATA
; 
5890                 if (uflags 
& X_OK
) { 
5891                         if (vnode_isdir(vp
)) { 
5892                                 action 
|= KAUTH_VNODE_SEARCH
; 
5894                                 action 
|= KAUTH_VNODE_EXECUTE
; 
5898                 /* take advantage of definition of uflags */ 
5899                 action 
= uflags 
>> 8; 
5903         error 
= mac_vnode_check_access(ctx
, vp
, uflags
); 
5909         /* action == 0 means only check for existence */ 
5911                 error 
= vnode_authorize(vp
, dvp
, action 
| KAUTH_VNODE_ACCESS
, ctx
); 
5922  * access_extended: Check access permissions in bulk. 
5924  * Description: uap->entries            Pointer to an array of accessx 
5925  *                                      descriptor structs, plus one or 
5926  *                                      more NULL terminated strings (see 
5927  *                                      "Notes" section below). 
5928  *              uap->size               Size of the area pointed to by 
5930  *              uap->results            Pointer to the results array. 
5932  * Returns:     0                       Success 
5933  *              ENOMEM                  Insufficient memory 
5934  *              EINVAL                  Invalid arguments 
5935  *              namei:EFAULT            Bad address 
5936  *              namei:ENAMETOOLONG      Filename too long 
5937  *              namei:ENOENT            No such file or directory 
5938  *              namei:ELOOP             Too many levels of symbolic links 
5939  *              namei:EBADF             Bad file descriptor 
5940  *              namei:ENOTDIR           Not a directory 
5945  *              uap->results            Array contents modified 
5947  * Notes:       The uap->entries are structured as an arbitrary length array 
5948  *              of accessx descriptors, followed by one or more NULL terminated 
5951  *                      struct accessx_descriptor[0] 
5953  *                      struct accessx_descriptor[n] 
5954  *                      char name_data[0]; 
5956  *              We determine the entry count by walking the buffer containing 
5957  *              the uap->entries argument descriptor.  For each descriptor we 
5958  *              see, the valid values for the offset ad_name_offset will be 
5959  *              in the byte range: 
5961  *                      [ uap->entries + sizeof(struct accessx_descriptor) ] 
5963  *                              [ uap->entries + uap->size - 2 ] 
5965  *              since we must have at least one string, and the string must 
5966  *              be at least one character plus the NULL terminator in length. 
5968  * XXX:         Need to support the check-as uid argument 
5971 access_extended(__unused proc_t p
, struct access_extended_args 
*uap
, __unused 
int32_t *retval
) 
5973         struct accessx_descriptor 
*input 
= NULL
; 
5974         errno_t 
*result 
= NULL
; 
5977         size_t desc_max
, desc_actual
; 
5979         struct vfs_context context
; 
5980         struct nameidata nd
; 
5984 #define ACCESSX_MAX_DESCR_ON_STACK 10 
5985         struct accessx_descriptor stack_input
[ACCESSX_MAX_DESCR_ON_STACK
]; 
5987         context
.vc_ucred 
= NULL
; 
5990          * Validate parameters; if valid, copy the descriptor array and string 
5991          * arguments into local memory.  Before proceeding, the following 
5992          * conditions must have been met: 
5994          * o    The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE 
5995          * o    There must be sufficient room in the request for at least one 
5996          *      descriptor and a one yte NUL terminated string. 
5997          * o    The allocation of local storage must not fail. 
5999         if (uap
->size 
> ACCESSX_MAX_TABLESIZE
) { 
6002         if (uap
->size 
< (sizeof(struct accessx_descriptor
) + 2)) { 
6005         if (uap
->size 
<= sizeof(stack_input
)) { 
6006                 input 
= stack_input
; 
6008                 input 
= kheap_alloc(KHEAP_DATA_BUFFERS
, uap
->size
, Z_WAITOK
); 
6009                 if (input 
== NULL
) { 
6014         error 
= copyin(uap
->entries
, input
, uap
->size
); 
6019         AUDIT_ARG(opaque
, input
, uap
->size
); 
6022          * Force NUL termination of the copyin buffer to avoid nami() running 
6023          * off the end.  If the caller passes us bogus data, they may get a 
6026         ((char *)input
)[uap
->size 
- 1] = 0; 
6029          * Access is defined as checking against the process' real identity, 
6030          * even if operations are checking the effective identity.  This 
6031          * requires that we use a local vfs context. 
6033         context
.vc_ucred 
= kauth_cred_copy_real(kauth_cred_get()); 
6034         context
.vc_thread 
= current_thread(); 
6037          * Find out how many entries we have, so we can allocate the result 
6038          * array by walking the list and adjusting the count downward by the 
6039          * earliest string offset we see. 
6041         desc_max 
= (uap
->size 
- 2) / sizeof(struct accessx_descriptor
); 
6042         desc_actual 
= desc_max
; 
6043         for (i 
= 0; i 
< desc_actual
; i
++) { 
6045                  * Take the offset to the name string for this entry and 
6046                  * convert to an input array index, which would be one off 
6047                  * the end of the array if this entry was the lowest-addressed 
6050                 j 
= input
[i
].ad_name_offset 
/ sizeof(struct accessx_descriptor
); 
6053                  * An offset greater than the max allowable offset is an error. 
6054                  * It is also an error for any valid entry to point 
6055                  * to a location prior to the end of the current entry, if 
6056                  * it's not a reference to the string of the previous entry. 
6058                 if (j 
> desc_max 
|| (j 
!= 0 && j 
<= i
)) { 
6063                 /* Also do not let ad_name_offset point to something beyond the size of the input */ 
6064                 if (input
[i
].ad_name_offset 
>= uap
->size
) { 
6070                  * An offset of 0 means use the previous descriptor's offset; 
6071                  * this is used to chain multiple requests for the same file 
6072                  * to avoid multiple lookups. 
6075                         /* This is not valid for the first entry */ 
6084                  * If the offset of the string for this descriptor is before 
6085                  * what we believe is the current actual last descriptor, 
6086                  * then we need to adjust our estimate downward; this permits 
6087                  * the string table following the last descriptor to be out 
6088                  * of order relative to the descriptor list. 
6090                 if (j 
< desc_actual
) { 
6096          * We limit the actual number of descriptors we are willing to process 
6097          * to a hard maximum of ACCESSX_MAX_DESCRIPTORS.  If the number being 
6098          * requested does not exceed this limit, 
6100         if (desc_actual 
> ACCESSX_MAX_DESCRIPTORS
) { 
6104         result 
= kheap_alloc(KHEAP_DATA_BUFFERS
, desc_actual 
* sizeof(errno_t
), 
6106         if (result 
== NULL
) { 
6112          * Do the work by iterating over the descriptor entries we know to 
6113          * at least appear to contain valid data. 
6116         for (i 
= 0; i 
< desc_actual
; i
++) { 
6118                  * If the ad_name_offset is 0, then we use the previous 
6119                  * results to make the check; otherwise, we are looking up 
6122                 if (input
[i
].ad_name_offset 
!= 0) { 
6123                         /* discard old vnodes */ 
6134                          * Scan forward in the descriptor list to see if we 
6135                          * need the parent vnode.  We will need it if we are 
6136                          * deleting, since we must have rights  to remove 
6137                          * entries in the parent directory, as well as the 
6138                          * rights to delete the object itself. 
6140                         wantdelete 
= input
[i
].ad_flags 
& _DELETE_OK
; 
6141                         for (j 
= i 
+ 1; (j 
< desc_actual
) && (input
[j
].ad_name_offset 
== 0); j
++) { 
6142                                 if (input
[j
].ad_flags 
& _DELETE_OK
) { 
6147                         niopts 
= FOLLOW 
| AUDITVNPATH1
; 
6149                         /* need parent for vnode_authorize for deletion test */ 
6151                                 niopts 
|= WANTPARENT
; 
6155                         NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, UIO_SYSSPACE
, 
6156                             CAST_USER_ADDR_T(((const char *)input
) + input
[i
].ad_name_offset
), 
6169                  * Handle lookup errors. 
6179                         /* run this access check */ 
6180                         result
[i
] = access1(vp
, dvp
, input
[i
].ad_flags
, &context
); 
6183                         /* fatal lookup error */ 
6189         AUDIT_ARG(data
, result
, sizeof(errno_t
), desc_actual
); 
6191         /* copy out results */ 
6192         error 
= copyout(result
, uap
->results
, desc_actual 
* sizeof(errno_t
)); 
6195         if (input 
&& input 
!= stack_input
) { 
6196                 kheap_free(KHEAP_DATA_BUFFERS
, input
, uap
->size
); 
6199                 kheap_free(KHEAP_DATA_BUFFERS
, result
, desc_actual 
* sizeof(errno_t
)); 
6207         if (IS_VALID_CRED(context
.vc_ucred
)) { 
6208                 kauth_cred_unref(&context
.vc_ucred
); 
6215  * Returns:     0                       Success 
6216  *              namei:EFAULT            Bad address 
6217  *              namei:ENAMETOOLONG      Filename too long 
6218  *              namei:ENOENT            No such file or directory 
6219  *              namei:ELOOP             Too many levels of symbolic links 
6220  *              namei:EBADF             Bad file descriptor 
6221  *              namei:ENOTDIR           Not a directory 
6226 faccessat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, int amode
, 
6227     int flag
, enum uio_seg segflg
) 
6230         struct nameidata nd
; 
6232         struct vfs_context context
; 
6234         int is_namedstream 
= 0; 
6238          * Unless the AT_EACCESS option is used, Access is defined as checking 
6239          * against the process' real identity, even if operations are checking 
6240          * the effective identity.  So we need to tweak the credential 
6241          * in the context for that case. 
6243         if (!(flag 
& AT_EACCESS
)) { 
6244                 context
.vc_ucred 
= kauth_cred_copy_real(kauth_cred_get()); 
6246                 context
.vc_ucred 
= ctx
->vc_ucred
; 
6248         context
.vc_thread 
= ctx
->vc_thread
; 
6251         niopts 
= (flag 
& AT_SYMLINK_NOFOLLOW 
? NOFOLLOW 
: FOLLOW
) | AUDITVNPATH1
; 
6252         /* need parent for vnode_authorize for deletion test */ 
6253         if (amode 
& _DELETE_OK
) { 
6254                 niopts 
|= WANTPARENT
; 
6256         NDINIT(&nd
, LOOKUP
, OP_ACCESS
, niopts
, segflg
, 
6260         /* access(F_OK) calls are allowed for resource forks. */ 
6261         if (amode 
== F_OK
) { 
6262                 nd
.ni_cnd
.cn_flags 
|= CN_ALLOWRSRCFORK
; 
6265         error 
= nameiat(&nd
, fd
); 
6271         /* Grab reference on the shadow stream file vnode to 
6272          * force an inactive on release which will mark it 
6275         if (vnode_isnamedstream(nd
.ni_vp
) && 
6276             (nd
.ni_vp
->v_parent 
!= NULLVP
) && 
6277             vnode_isshadow(nd
.ni_vp
)) { 
6279                 vnode_ref(nd
.ni_vp
); 
6283         error 
= access1(nd
.ni_vp
, nd
.ni_dvp
, amode
, &context
); 
6286         if (is_namedstream
) { 
6287                 vnode_rele(nd
.ni_vp
); 
6291         vnode_put(nd
.ni_vp
); 
6292         if (amode 
& _DELETE_OK
) { 
6293                 vnode_put(nd
.ni_dvp
); 
6298         if (!(flag 
& AT_EACCESS
)) { 
6299                 kauth_cred_unref(&context
.vc_ucred
); 
6305 access(__unused proc_t p
, struct access_args 
*uap
, __unused 
int32_t *retval
) 
6307         return faccessat_internal(vfs_context_current(), AT_FDCWD
, 
6308                    uap
->path
, uap
->flags
, 0, UIO_USERSPACE
); 
6312 faccessat(__unused proc_t p
, struct faccessat_args 
*uap
, 
6313     __unused 
int32_t *retval
) 
6315         if (uap
->flag 
& ~(AT_EACCESS 
| AT_SYMLINK_NOFOLLOW
)) { 
6319         return faccessat_internal(vfs_context_current(), uap
->fd
, 
6320                    uap
->path
, uap
->amode
, uap
->flag
, UIO_USERSPACE
); 
6324  * Returns:     0                       Success 
6331 fstatat_internal(vfs_context_t ctx
, user_addr_t path
, user_addr_t ub
, 
6332     user_addr_t xsecurity
, user_addr_t xsecurity_size
, int isstat64
, 
6333     enum uio_seg segflg
, int fd
, int flag
) 
6335         struct nameidata nd
; 
6342                 struct user64_stat user64_sb
; 
6343                 struct user32_stat user32_sb
; 
6344                 struct user64_stat64 user64_sb64
; 
6345                 struct user32_stat64 user32_sb64
; 
6349         kauth_filesec_t fsec
; 
6350         size_t xsecurity_bufsize
; 
6352         struct fileproc 
*fp 
= NULL
; 
6353         int needsrealdev 
= 0; 
6355         follow 
= (flag 
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW 
: FOLLOW
; 
6356         NDINIT(&nd
, LOOKUP
, OP_GETATTR
, follow 
| AUDITVNPATH1
, 
6360         int is_namedstream 
= 0; 
6361         /* stat calls are allowed for resource forks. */ 
6362         nd
.ni_cnd
.cn_flags 
|= CN_ALLOWRSRCFORK
; 
6365         if (flag 
& AT_FDONLY
) { 
6368                 error 
= fp_getfvp(vfs_context_proc(ctx
), fd
, &fp
, &fvp
); 
6372                 if ((error 
= vnode_getwithref(fvp
))) { 
6378                 error 
= nameiat(&nd
, fd
); 
6383         fsec 
= KAUTH_FILESEC_NONE
; 
6385         statptr 
= (void *)&source
; 
6388         /* Grab reference on the shadow stream file vnode to 
6389          * force an inactive on release which will mark it 
6392         if (vnode_isnamedstream(nd
.ni_vp
) && 
6393             (nd
.ni_vp
->v_parent 
!= NULLVP
) && 
6394             vnode_isshadow(nd
.ni_vp
)) { 
6396                 vnode_ref(nd
.ni_vp
); 
6400         needsrealdev 
= flag 
& AT_REALDEV 
? 1 : 0; 
6401         if (fp 
&& (xsecurity 
== USER_ADDR_NULL
)) { 
6403                  * If the caller has the file open, and is not 
6404                  * requesting extended security information, we are 
6405                  * going to let them get the basic stat information. 
6407                 error 
= vn_stat_noauth(nd
.ni_vp
, statptr
, NULL
, isstat64
, needsrealdev
, ctx
, 
6408                     fp
->fp_glob
->fg_cred
); 
6410                 error 
= vn_stat(nd
.ni_vp
, statptr
, (xsecurity 
!= USER_ADDR_NULL 
? &fsec 
: NULL
), 
6411                     isstat64
, needsrealdev
, ctx
); 
6415         if (is_namedstream
) { 
6416                 vnode_rele(nd
.ni_vp
); 
6419         vnode_put(nd
.ni_vp
); 
6429         /* Zap spare fields */ 
6430         if (isstat64 
!= 0) { 
6431                 source
.sb64
.st_lspare 
= 0; 
6432                 source
.sb64
.st_qspare
[0] = 0LL; 
6433                 source
.sb64
.st_qspare
[1] = 0LL; 
6434                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) { 
6435                         munge_user64_stat64(&source
.sb64
, &dest
.user64_sb64
); 
6436                         my_size 
= sizeof(dest
.user64_sb64
); 
6437                         sbp 
= (caddr_t
)&dest
.user64_sb64
; 
6439                         munge_user32_stat64(&source
.sb64
, &dest
.user32_sb64
); 
6440                         my_size 
= sizeof(dest
.user32_sb64
); 
6441                         sbp 
= (caddr_t
)&dest
.user32_sb64
; 
6444                  * Check if we raced (post lookup) against the last unlink of a file. 
6446                 if ((source
.sb64
.st_nlink 
== 0) && S_ISREG(source
.sb64
.st_mode
)) { 
6447                         source
.sb64
.st_nlink 
= 1; 
6450                 source
.sb
.st_lspare 
= 0; 
6451                 source
.sb
.st_qspare
[0] = 0LL; 
6452                 source
.sb
.st_qspare
[1] = 0LL; 
6453                 if (IS_64BIT_PROCESS(vfs_context_proc(ctx
))) { 
6454                         munge_user64_stat(&source
.sb
, &dest
.user64_sb
); 
6455                         my_size 
= sizeof(dest
.user64_sb
); 
6456                         sbp 
= (caddr_t
)&dest
.user64_sb
; 
6458                         munge_user32_stat(&source
.sb
, &dest
.user32_sb
); 
6459                         my_size 
= sizeof(dest
.user32_sb
); 
6460                         sbp 
= (caddr_t
)&dest
.user32_sb
; 
6464                  * Check if we raced (post lookup) against the last unlink of a file. 
6466                 if ((source
.sb
.st_nlink 
== 0) && S_ISREG(source
.sb
.st_mode
)) { 
6467                         source
.sb
.st_nlink 
= 1; 
6470         if ((error 
= copyout(sbp
, ub
, my_size
)) != 0) { 
6474         /* caller wants extended security information? */ 
6475         if (xsecurity 
!= USER_ADDR_NULL
) { 
6476                 /* did we get any? */ 
6477                 if (fsec 
== KAUTH_FILESEC_NONE
) { 
6478                         if (susize(xsecurity_size
, 0) != 0) { 
6483                         /* find the user buffer size */ 
6484                         xsecurity_bufsize 
= fusize(xsecurity_size
); 
6486                         /* copy out the actual data size */ 
6487                         if (susize(xsecurity_size
, KAUTH_FILESEC_COPYSIZE(fsec
)) != 0) { 
6492                         /* if the caller supplied enough room, copy out to it */ 
6493                         if (xsecurity_bufsize 
>= KAUTH_FILESEC_COPYSIZE(fsec
)) { 
6494                                 error 
= copyout(fsec
, xsecurity
, KAUTH_FILESEC_COPYSIZE(fsec
)); 
6499         if (fsec 
!= KAUTH_FILESEC_NONE
) { 
6500                 kauth_filesec_free(fsec
); 
6506  * stat_extended: Get file status; with extended security (ACL). 
6508  * Parameters:    p                       (ignored) 
6509  *                uap                     User argument descriptor (see below) 
6512  * Indirect:      uap->path               Path of file to get status from 
6513  *                uap->ub                 User buffer (holds file status info) 
6514  *                uap->xsecurity          ACL to get (extended security) 
6515  *                uap->xsecurity_size     Size of ACL 
6517  * Returns:        0                      Success 
6522 stat_extended(__unused proc_t p
, struct stat_extended_args 
*uap
, 
6523     __unused 
int32_t *retval
) 
6525         return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
, 
6526                    uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
, 
6531  * Returns:     0                       Success 
6532  *      fstatat_internal:???            [see fstatat_internal() in this file] 
6535 stat(__unused proc_t p
, struct stat_args 
*uap
, __unused 
int32_t *retval
) 
6537         return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
, 
6538                    0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, 0); 
6542 stat64(__unused proc_t p
, struct stat64_args 
*uap
, __unused 
int32_t *retval
) 
6544         return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
, 
6545                    0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, 0); 
6549  * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL). 
6551  * Parameters:    p                       (ignored) 
6552  *                uap                     User argument descriptor (see below) 
6555  * Indirect:      uap->path               Path of file to get status from 
6556  *                uap->ub                 User buffer (holds file status info) 
6557  *                uap->xsecurity          ACL to get (extended security) 
6558  *                uap->xsecurity_size     Size of ACL 
6560  * Returns:        0                      Success 
6565 stat64_extended(__unused proc_t p
, struct stat64_extended_args 
*uap
, __unused 
int32_t *retval
) 
6567         return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
, 
6568                    uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
, 
6573  * lstat_extended: Get file status; does not follow links; with extended security (ACL). 
6575  * Parameters:    p                       (ignored) 
6576  *                uap                     User argument descriptor (see below) 
6579  * Indirect:      uap->path               Path of file to get status from 
6580  *                uap->ub                 User buffer (holds file status info) 
6581  *                uap->xsecurity          ACL to get (extended security) 
6582  *                uap->xsecurity_size     Size of ACL 
6584  * Returns:        0                      Success 
6589 lstat_extended(__unused proc_t p
, struct lstat_extended_args 
*uap
, __unused 
int32_t *retval
) 
6591         return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
, 
6592                    uap
->xsecurity
, uap
->xsecurity_size
, 0, UIO_USERSPACE
, AT_FDCWD
, 
6593                    AT_SYMLINK_NOFOLLOW
); 
6597  * Get file status; this version does not follow links. 
6600 lstat(__unused proc_t p
, struct lstat_args 
*uap
, __unused 
int32_t *retval
) 
6602         return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
, 
6603                    0, 0, 0, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
); 
6607 lstat64(__unused proc_t p
, struct lstat64_args 
*uap
, __unused 
int32_t *retval
) 
6609         return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
, 
6610                    0, 0, 1, UIO_USERSPACE
, AT_FDCWD
, AT_SYMLINK_NOFOLLOW
); 
6614  * lstat64_extended: Get file status; can handle large inode numbers; does not 
6615  * follow links; with extended security (ACL). 
6617  * Parameters:    p                       (ignored) 
6618  *                uap                     User argument descriptor (see below) 
6621  * Indirect:      uap->path               Path of file to get status from 
6622  *                uap->ub                 User buffer (holds file status info) 
6623  *                uap->xsecurity          ACL to get (extended security) 
6624  *                uap->xsecurity_size     Size of ACL 
6626  * Returns:        0                      Success 
6631 lstat64_extended(__unused proc_t p
, struct lstat64_extended_args 
*uap
, __unused 
int32_t *retval
) 
6633         return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
, 
6634                    uap
->xsecurity
, uap
->xsecurity_size
, 1, UIO_USERSPACE
, AT_FDCWD
, 
6635                    AT_SYMLINK_NOFOLLOW
); 
6639 fstatat(__unused proc_t p
, struct fstatat_args 
*uap
, __unused 
int32_t *retval
) 
6641         if (uap
->flag 
& ~(AT_SYMLINK_NOFOLLOW 
| AT_REALDEV 
| AT_FDONLY
)) { 
6645         return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
, 
6646                    0, 0, 0, UIO_USERSPACE
, uap
->fd
, uap
->flag
); 
6650 fstatat64(__unused proc_t p
, struct fstatat64_args 
*uap
, 
6651     __unused 
int32_t *retval
) 
6653         if (uap
->flag 
& ~(AT_SYMLINK_NOFOLLOW 
| AT_REALDEV 
| AT_FDONLY
)) { 
6657         return fstatat_internal(vfs_context_current(), uap
->path
, uap
->ub
, 
6658                    0, 0, 1, UIO_USERSPACE
, uap
->fd
, uap
->flag
); 
6662  * Get configurable pathname variables. 
6664  * Returns:     0                       Success 
6668  * Notes:       Global implementation  constants are intended to be 
6669  *              implemented in this function directly; all other constants 
6670  *              are per-FS implementation, and therefore must be handled in 
6671  *              each respective FS, instead. 
6673  * XXX We implement some things globally right now that should actually be 
6674  * XXX per-FS; we will need to deal with this at some point. 
6678 pathconf(__unused proc_t p
, struct pathconf_args 
*uap
, int32_t *retval
) 
6681         struct nameidata nd
; 
6682         vfs_context_t ctx 
= vfs_context_current(); 
6684         NDINIT(&nd
, LOOKUP
, OP_PATHCONF
, FOLLOW 
| AUDITVNPATH1
, 
6685             UIO_USERSPACE
, uap
->path
, ctx
); 
6691         error 
= vn_pathconf(nd
.ni_vp
, uap
->name
, retval
, ctx
); 
6693         vnode_put(nd
.ni_vp
); 
6699  * Return target name of a symbolic link. 
6703 readlinkat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, 
6704     enum uio_seg seg
, user_addr_t buf
, size_t bufsize
, enum uio_seg bufseg
, 
6710         struct nameidata nd
; 
6711         char uio_buf
[UIO_SIZEOF(1)]; 
6713         if (bufsize 
> INT32_MAX
) { 
6717         NDINIT(&nd
, LOOKUP
, OP_READLINK
, NOFOLLOW 
| AUDITVNPATH1
, 
6720         error 
= nameiat(&nd
, fd
); 
6728         auio 
= uio_createwithbuffer(1, 0, bufseg
, UIO_READ
, 
6729             &uio_buf
[0], sizeof(uio_buf
)); 
6730         uio_addiov(auio
, buf
, bufsize
); 
6731         if (vp
->v_type 
!= VLNK
) { 
6735                 error 
= mac_vnode_check_readlink(ctx
, vp
); 
6738                         error 
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, 
6742                         error 
= VNOP_READLINK(vp
, auio
, ctx
); 
6747         *retval 
= (int)(bufsize 
- uio_resid(auio
)); 
6752 readlink(proc_t p
, struct readlink_args 
*uap
, int32_t *retval
) 
6754         enum uio_seg procseg
; 
6756         procseg 
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64 
: UIO_USERSPACE32
; 
6757         return readlinkat_internal(vfs_context_current(), AT_FDCWD
, 
6758                    CAST_USER_ADDR_T(uap
->path
), procseg
, CAST_USER_ADDR_T(uap
->buf
), 
6759                    uap
->count
, procseg
, retval
); 
6763 readlinkat(proc_t p
, struct readlinkat_args 
*uap
, int32_t *retval
) 
6765         enum uio_seg procseg
; 
6767         procseg 
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64 
: UIO_USERSPACE32
; 
6768         return readlinkat_internal(vfs_context_current(), uap
->fd
, uap
->path
, 
6769                    procseg
, uap
->buf
, uap
->bufsize
, procseg
, retval
); 
6773  * Change file flags, the deep inner layer. 
6776 chflags0(vnode_t vp
, struct vnode_attr 
*va
, 
6777     int (*setattr
)(vnode_t
, void *, vfs_context_t
), 
6778     void *arg
, vfs_context_t ctx
) 
6780         kauth_action_t action 
= 0; 
6784         error 
= mac_vnode_check_setflags(ctx
, vp
, va
->va_flags
); 
6790         /* request authorisation, disregard immutability */ 
6791         if ((error 
= vnode_authattr(vp
, va
, &action
, ctx
)) != 0) { 
6795          * Request that the auth layer disregard those file flags it's allowed to when 
6796          * authorizing this operation; we need to do this in order to be able to 
6797          * clear immutable flags. 
6799         if (action 
&& ((error 
= vnode_authorize(vp
, NULL
, action 
| KAUTH_VNODE_NOIMMUTABLE
, ctx
)) != 0)) { 
6802         error 
= (*setattr
)(vp
, arg
, ctx
); 
6806                 mac_vnode_notify_setflags(ctx
, vp
, va
->va_flags
); 
6815  * Change file flags. 
6817  * NOTE: this will vnode_put() `vp' 
6820 chflags1(vnode_t vp
, int flags
, vfs_context_t ctx
) 
6822         struct vnode_attr va
; 
6826         VATTR_SET(&va
, va_flags
, flags
); 
6828         error 
= chflags0(vp
, &va
, (void *)vnode_setattr
, &va
, ctx
); 
6831         if ((error 
== 0) && !VATTR_IS_SUPPORTED(&va
, va_flags
)) { 
6839  * Change flags of a file given a path name. 
6843 chflags(__unused proc_t p
, struct chflags_args 
*uap
, __unused 
int32_t *retval
) 
6846         vfs_context_t ctx 
= vfs_context_current(); 
6848         struct nameidata nd
; 
6850         AUDIT_ARG(fflags
, uap
->flags
); 
6851         NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW 
| AUDITVNPATH1
, 
6852             UIO_USERSPACE
, uap
->path
, ctx
); 
6860         /* we don't vnode_put() here because chflags1 does internally */ 
6861         error 
= chflags1(vp
, uap
->flags
, ctx
); 
6867  * Change flags of a file given a file descriptor. 
6871 fchflags(__unused proc_t p
, struct fchflags_args 
*uap
, __unused 
int32_t *retval
) 
6876         AUDIT_ARG(fd
, uap
->fd
); 
6877         AUDIT_ARG(fflags
, uap
->flags
); 
6878         if ((error 
= file_vnode(uap
->fd
, &vp
))) { 
6882         if ((error 
= vnode_getwithref(vp
))) { 
6887         AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
); 
6889         /* we don't vnode_put() here because chflags1 does internally */ 
6890         error 
= chflags1(vp
, uap
->flags
, vfs_context_current()); 
6897  * Change security information on a filesystem object. 
6899  * Returns:     0                       Success 
6900  *              EPERM                   Operation not permitted 
6901  *              vnode_authattr:???      [anything vnode_authattr can return] 
6902  *              vnode_authorize:???     [anything vnode_authorize can return] 
6903  *              vnode_setattr:???       [anything vnode_setattr can return] 
6905  * Notes:       If vnode_authattr or vnode_authorize return EACCES, it will be 
6906  *              translated to EPERM before being returned. 
6909 chmod_vnode(vfs_context_t ctx
, vnode_t vp
, struct vnode_attr 
*vap
) 
6911         kauth_action_t action
; 
6914         AUDIT_ARG(mode
, vap
->va_mode
); 
6915         /* XXX audit new args */ 
6918         /* chmod calls are not allowed for resource forks. */ 
6919         if (vp
->v_flag 
& VISNAMEDSTREAM
) { 
6925         if (VATTR_IS_ACTIVE(vap
, va_mode
) && 
6926             (error 
= mac_vnode_check_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
)) != 0) { 
6930         if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) { 
6931                 if ((error 
= mac_vnode_check_setowner(ctx
, vp
, 
6932                     VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid 
: -1, 
6933                     VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid 
: -1))) { 
6938         if (VATTR_IS_ACTIVE(vap
, va_acl
) && 
6939             (error 
= mac_vnode_check_setacl(ctx
, vp
, vap
->va_acl
))) { 
6944         /* make sure that the caller is allowed to set this security information */ 
6945         if (((error 
= vnode_authattr(vp
, vap
, &action
, ctx
)) != 0) || 
6946             ((error 
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) { 
6947                 if (error 
== EACCES
) { 
6953         if ((error 
= vnode_setattr(vp
, vap
, ctx
)) != 0) { 
6958         if (VATTR_IS_ACTIVE(vap
, va_mode
)) { 
6959                 mac_vnode_notify_setmode(ctx
, vp
, (mode_t
)vap
->va_mode
); 
6962         if (VATTR_IS_ACTIVE(vap
, va_uid
) || VATTR_IS_ACTIVE(vap
, va_gid
)) { 
6963                 mac_vnode_notify_setowner(ctx
, vp
, 
6964                     VATTR_IS_ACTIVE(vap
, va_uid
) ? vap
->va_uid 
: -1, 
6965                     VATTR_IS_ACTIVE(vap
, va_gid
) ? vap
->va_gid 
: -1); 
6968         if (VATTR_IS_ACTIVE(vap
, va_acl
)) { 
6969                 mac_vnode_notify_setacl(ctx
, vp
, vap
->va_acl
); 
6978  * Change mode of a file given a path name. 
6980  * Returns:     0                       Success 
6981  *              namei:???               [anything namei can return] 
6982  *              chmod_vnode:???         [anything chmod_vnode can return] 
6985 chmodat(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr 
*vap
, 
6986     int fd
, int flag
, enum uio_seg segflg
) 
6988         struct nameidata nd
; 
6991         follow 
= (flag 
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW 
: FOLLOW
; 
6992         NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow 
| AUDITVNPATH1
, 
6994         if ((error 
= nameiat(&nd
, fd
))) { 
6997         error 
= chmod_vnode(ctx
, nd
.ni_vp
, vap
); 
6998         vnode_put(nd
.ni_vp
); 
7004  * chmod_extended: Change the mode of a file given a path name; with extended 
7005  * argument list (including extended security (ACL)). 
7007  * Parameters:  p                       Process requesting the open 
7008  *              uap                     User argument descriptor (see below) 
7011  * Indirect:    uap->path               Path to object (same as 'chmod') 
7012  *              uap->uid                UID to set 
7013  *              uap->gid                GID to set 
7014  *              uap->mode               File mode to set (same as 'chmod') 
7015  *              uap->xsecurity          ACL to set (or delete) 
7017  * Returns:     0                       Success 
7020  * Notes:       The kauth_filesec_t in 'va', if any, is in host byte order. 
7022  * XXX:         We should enummerate the possible errno values here, and where 
7023  *              in the code they originated. 
7026 chmod_extended(__unused proc_t p
, struct chmod_extended_args 
*uap
, __unused 
int32_t *retval
) 
7029         struct vnode_attr va
; 
7030         kauth_filesec_t xsecdst
; 
7032         AUDIT_ARG(owner
, uap
->uid
, uap
->gid
); 
7035         if (uap
->mode 
!= -1) { 
7036                 VATTR_SET(&va
, va_mode
, uap
->mode 
& ALLPERMS
); 
7038         if (uap
->uid 
!= KAUTH_UID_NONE
) { 
7039                 VATTR_SET(&va
, va_uid
, uap
->uid
); 
7041         if (uap
->gid 
!= KAUTH_GID_NONE
) { 
7042                 VATTR_SET(&va
, va_gid
, uap
->gid
); 
7046         switch (uap
->xsecurity
) { 
7047         /* explicit remove request */ 
7048         case CAST_USER_ADDR_T((void *)1):       /* _FILESEC_REMOVE_ACL */ 
7049                 VATTR_SET(&va
, va_acl
, NULL
); 
7052         case USER_ADDR_NULL
: 
7055                 if ((error 
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) { 
7058                 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
); 
7059                 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va
.va_acl
->acl_entrycount
); 
7062         error 
= chmodat(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
, 0, 
7065         if (xsecdst 
!= NULL
) { 
7066                 kauth_filesec_free(xsecdst
); 
7072  * Returns:     0                       Success 
7073  *              chmodat:???             [anything chmodat can return] 
7076 fchmodat_internal(vfs_context_t ctx
, user_addr_t path
, int mode
, int fd
, 
7077     int flag
, enum uio_seg segflg
) 
7079         struct vnode_attr va
; 
7082         VATTR_SET(&va
, va_mode
, mode 
& ALLPERMS
); 
7084         return chmodat(ctx
, path
, &va
, fd
, flag
, segflg
); 
7088 chmod(__unused proc_t p
, struct chmod_args 
*uap
, __unused 
int32_t *retval
) 
7090         return fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
, 
7091                    AT_FDCWD
, 0, UIO_USERSPACE
); 
7095 fchmodat(__unused proc_t p
, struct fchmodat_args 
*uap
, __unused 
int32_t *retval
) 
7097         if (uap
->flag 
& ~AT_SYMLINK_NOFOLLOW
) { 
7101         return fchmodat_internal(vfs_context_current(), uap
->path
, uap
->mode
, 
7102                    uap
->fd
, uap
->flag
, UIO_USERSPACE
); 
7106  * Change mode of a file given a file descriptor. 
7109 fchmod1(__unused proc_t p
, int fd
, struct vnode_attr 
*vap
) 
7116         if ((error 
= file_vnode(fd
, &vp
)) != 0) { 
7119         if ((error 
= vnode_getwithref(vp
)) != 0) { 
7123         AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
); 
7125         error 
= chmod_vnode(vfs_context_current(), vp
, vap
); 
7126         (void)vnode_put(vp
); 
7133  * fchmod_extended: Change mode of a file given a file descriptor; with 
7134  * extended argument list (including extended security (ACL)). 
7136  * Parameters:    p                       Process requesting to change file mode 
7137  *                uap                     User argument descriptor (see below) 
7140  * Indirect:      uap->mode               File mode to set (same as 'chmod') 
7141  *                uap->uid                UID to set 
7142  *                uap->gid                GID to set 
7143  *                uap->xsecurity          ACL to set (or delete) 
7144  *                uap->fd                 File descriptor of file to change mode 
7146  * Returns:        0                      Success 
7151 fchmod_extended(proc_t p
, struct fchmod_extended_args 
*uap
, __unused 
int32_t *retval
) 
7154         struct vnode_attr va
; 
7155         kauth_filesec_t xsecdst
; 
7157         AUDIT_ARG(owner
, uap
->uid
, uap
->gid
); 
7160         if (uap
->mode 
!= -1) { 
7161                 VATTR_SET(&va
, va_mode
, uap
->mode 
& ALLPERMS
); 
7163         if (uap
->uid 
!= KAUTH_UID_NONE
) { 
7164                 VATTR_SET(&va
, va_uid
, uap
->uid
); 
7166         if (uap
->gid 
!= KAUTH_GID_NONE
) { 
7167                 VATTR_SET(&va
, va_gid
, uap
->gid
); 
7171         switch (uap
->xsecurity
) { 
7172         case USER_ADDR_NULL
: 
7173                 VATTR_SET(&va
, va_acl
, NULL
); 
7175         case CAST_USER_ADDR_T((void *)1):       /* _FILESEC_REMOVE_ACL */ 
7176                 VATTR_SET(&va
, va_acl
, NULL
); 
7179         case CAST_USER_ADDR_T(-1): 
7182                 if ((error 
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) { 
7185                 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
); 
7188         error 
= fchmod1(p
, uap
->fd
, &va
); 
7191         switch (uap
->xsecurity
) { 
7192         case USER_ADDR_NULL
: 
7193         case CAST_USER_ADDR_T(-1): 
7196                 if (xsecdst 
!= NULL
) { 
7197                         kauth_filesec_free(xsecdst
); 
7204 fchmod(proc_t p
, struct fchmod_args 
*uap
, __unused 
int32_t *retval
) 
7206         struct vnode_attr va
; 
7209         VATTR_SET(&va
, va_mode
, uap
->mode 
& ALLPERMS
); 
7211         return fchmod1(p
, uap
->fd
, &va
); 
7216  * Set ownership given a path name. 
7220 fchownat_internal(vfs_context_t ctx
, int fd
, user_addr_t path
, uid_t uid
, 
7221     gid_t gid
, int flag
, enum uio_seg segflg
) 
7224         struct vnode_attr va
; 
7226         struct nameidata nd
; 
7228         kauth_action_t action
; 
7230         AUDIT_ARG(owner
, uid
, gid
); 
7232         follow 
= (flag 
& AT_SYMLINK_NOFOLLOW
) ? NOFOLLOW 
: FOLLOW
; 
7233         NDINIT(&nd
, LOOKUP
, OP_SETATTR
, follow 
| AUDITVNPATH1
, segflg
, 
7235         error 
= nameiat(&nd
, fd
); 
7244         if (uid 
!= (uid_t
)VNOVAL
) { 
7245                 VATTR_SET(&va
, va_uid
, uid
); 
7247         if (gid 
!= (gid_t
)VNOVAL
) { 
7248                 VATTR_SET(&va
, va_gid
, gid
); 
7252         error 
= mac_vnode_check_setowner(ctx
, vp
, uid
, gid
); 
7258         /* preflight and authorize attribute changes */ 
7259         if ((error 
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) { 
7262         if (action 
&& ((error 
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) { 
7265         error 
= vnode_setattr(vp
, &va
, ctx
); 
7269                 mac_vnode_notify_setowner(ctx
, vp
, uid
, gid
); 
7275          * EACCES is only allowed from namei(); permissions failure should 
7276          * return EPERM, so we need to translate the error code. 
7278         if (error 
== EACCES
) { 
7287 chown(__unused proc_t p
, struct chown_args 
*uap
, __unused 
int32_t *retval
) 
7289         return fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
, 
7290                    uap
->uid
, uap
->gid
, 0, UIO_USERSPACE
); 
7294 lchown(__unused proc_t p
, struct lchown_args 
*uap
, __unused 
int32_t *retval
) 
7296         return fchownat_internal(vfs_context_current(), AT_FDCWD
, uap
->path
, 
7297                    uap
->owner
, uap
->group
, AT_SYMLINK_NOFOLLOW
, UIO_USERSPACE
); 
7301 fchownat(__unused proc_t p
, struct fchownat_args 
*uap
, __unused 
int32_t *retval
) 
7303         if (uap
->flag 
& ~AT_SYMLINK_NOFOLLOW
) { 
7307         return fchownat_internal(vfs_context_current(), uap
->fd
, uap
->path
, 
7308                    uap
->uid
, uap
->gid
, uap
->flag
, UIO_USERSPACE
); 
7312  * Set ownership given a file descriptor. 
7316 fchown(__unused proc_t p
, struct fchown_args 
*uap
, __unused 
int32_t *retval
) 
7318         struct vnode_attr va
; 
7319         vfs_context_t ctx 
= vfs_context_current(); 
7322         kauth_action_t action
; 
7324         AUDIT_ARG(owner
, uap
->uid
, uap
->gid
); 
7325         AUDIT_ARG(fd
, uap
->fd
); 
7327         if ((error 
= file_vnode(uap
->fd
, &vp
))) { 
7331         if ((error 
= vnode_getwithref(vp
))) { 
7335         AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
); 
7338         if (uap
->uid 
!= VNOVAL
) { 
7339                 VATTR_SET(&va
, va_uid
, uap
->uid
); 
7341         if (uap
->gid 
!= VNOVAL
) { 
7342                 VATTR_SET(&va
, va_gid
, uap
->gid
); 
7346         /* chown calls are not allowed for resource forks. */ 
7347         if (vp
->v_flag 
& VISNAMEDSTREAM
) { 
7354         error 
= mac_vnode_check_setowner(ctx
, vp
, uap
->uid
, uap
->gid
); 
7360         /* preflight and authorize attribute changes */ 
7361         if ((error 
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) { 
7364         if (action 
&& ((error 
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) { 
7365                 if (error 
== EACCES
) { 
7370         error 
= vnode_setattr(vp
, &va
, ctx
); 
7374                 mac_vnode_notify_setowner(ctx
, vp
, uap
->uid
, uap
->gid
); 
7379         (void)vnode_put(vp
); 
7385 getutimes(user_addr_t usrtvp
, struct timespec 
*tsp
) 
7389         if (usrtvp 
== USER_ADDR_NULL
) { 
7390                 struct timeval old_tv
; 
7391                 /* XXX Y2038 bug because of microtime argument */ 
7393                 TIMEVAL_TO_TIMESPEC(&old_tv
, &tsp
[0]); 
7396                 if (IS_64BIT_PROCESS(current_proc())) { 
7397                         struct user64_timeval tv
[2]; 
7398                         error 
= copyin(usrtvp
, (void *)tv
, sizeof(tv
)); 
7402                         TIMEVAL64_TO_TIMESPEC(&tv
[0], &tsp
[0]); 
7403                         TIMEVAL64_TO_TIMESPEC(&tv
[1], &tsp
[1]); 
7405                         struct user32_timeval tv
[2]; 
7406                         error 
= copyin(usrtvp
, (void *)tv
, sizeof(tv
)); 
7410                         TIMEVAL_TO_TIMESPEC(&tv
[0], &tsp
[0]); 
7411                         TIMEVAL_TO_TIMESPEC(&tv
[1], &tsp
[1]); 
7418 setutimes(vfs_context_t ctx
, vnode_t vp
, const struct timespec 
*ts
, 
7422         struct vnode_attr va
; 
7423         kauth_action_t action
; 
7425         AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
); 
7428         VATTR_SET(&va
, va_access_time
, ts
[0]); 
7429         VATTR_SET(&va
, va_modify_time
, ts
[1]); 
7431                 va
.va_vaflags 
|= VA_UTIMES_NULL
; 
7435         /* utimes calls are not allowed for resource forks. */ 
7436         if (vp
->v_flag 
& VISNAMEDSTREAM
) { 
7443         error 
= mac_vnode_check_setutimes(ctx
, vp
, ts
[0], ts
[1]); 
7448         if ((error 
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) { 
7449                 if (!nullflag 
&& error 
== EACCES
) { 
7455         /* since we may not need to auth anything, check here */ 
7456         if ((action 
!= 0) && ((error 
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) { 
7457                 if (!nullflag 
&& error 
== EACCES
) { 
7462         error 
= vnode_setattr(vp
, &va
, ctx
); 
7466                 mac_vnode_notify_setutimes(ctx
, vp
, ts
[0], ts
[1]); 
7475  * Set the access and modification times of a file. 
7479 utimes(__unused proc_t p
, struct utimes_args 
*uap
, __unused 
int32_t *retval
) 
7481         struct timespec ts
[2]; 
7484         struct nameidata nd
; 
7485         vfs_context_t ctx 
= vfs_context_current(); 
7488          * AUDIT: Needed to change the order of operations to do the 
7489          * name lookup first because auditing wants the path. 
7491         NDINIT(&nd
, LOOKUP
, OP_SETATTR
, FOLLOW 
| AUDITVNPATH1
, 
7492             UIO_USERSPACE
, uap
->path
, ctx
); 
7500          * Fetch the user-supplied time.  If usrtvp is USER_ADDR_NULL, we fetch 
7501          * the current time instead. 
7504         if ((error 
= getutimes(usrtvp
, ts
)) != 0) { 
7508         error 
= setutimes(ctx
, nd
.ni_vp
, ts
, usrtvp 
== USER_ADDR_NULL
); 
7511         vnode_put(nd
.ni_vp
); 
7516  * Set the access and modification times of a file. 
7520 futimes(__unused proc_t p
, struct futimes_args 
*uap
, __unused 
int32_t *retval
) 
7522         struct timespec ts
[2]; 
7527         AUDIT_ARG(fd
, uap
->fd
); 
7529         if ((error 
= getutimes(usrtvp
, ts
)) != 0) { 
7532         if ((error 
= file_vnode(uap
->fd
, &vp
)) != 0) { 
7535         if ((error 
= vnode_getwithref(vp
))) { 
7540         error 
=  setutimes(vfs_context_current(), vp
, ts
, usrtvp 
== 0); 
7547  * Truncate a file given its path name. 
7551 truncate(proc_t p
, struct truncate_args 
*uap
, __unused 
int32_t *retval
) 
7554         struct vnode_attr va
; 
7555         vfs_context_t ctx 
= vfs_context_current(); 
7557         struct nameidata nd
; 
7558         kauth_action_t action
; 
7561         if (uap
->length 
< 0) { 
7565         fsize_limit 
= proc_limitgetcur(p
, RLIMIT_FSIZE
, TRUE
); 
7566         if ((rlim_t
)uap
->length 
> fsize_limit
) { 
7567                 psignal(p
, SIGXFSZ
); 
7571         NDINIT(&nd
, LOOKUP
, OP_TRUNCATE
, FOLLOW 
| AUDITVNPATH1
, 
7572             UIO_USERSPACE
, uap
->path
, ctx
); 
7573         if ((error 
= namei(&nd
))) { 
7581         VATTR_SET(&va
, va_data_size
, uap
->length
); 
7584         error 
= mac_vnode_check_truncate(ctx
, NOCRED
, vp
); 
7590         if ((error 
= vnode_authattr(vp
, &va
, &action
, ctx
)) != 0) { 
7593         if ((action 
!= 0) && ((error 
= vnode_authorize(vp
, NULL
, action
, ctx
)) != 0)) { 
7596         error 
= vnode_setattr(vp
, &va
, ctx
); 
7600                 mac_vnode_notify_truncate(ctx
, NOCRED
, vp
); 
7610  * Truncate a file given a file descriptor. 
7614 ftruncate(proc_t p
, struct ftruncate_args 
*uap
, int32_t *retval
) 
7616         vfs_context_t ctx 
= vfs_context_current(); 
7617         struct vnode_attr va
; 
7619         struct fileproc 
*fp
; 
7624         AUDIT_ARG(fd
, uap
->fd
); 
7625         if (uap
->length 
< 0) { 
7629         fsize_limit 
= proc_limitgetcur(p
, RLIMIT_FSIZE
, TRUE
); 
7630         if ((rlim_t
)uap
->length 
> fsize_limit
) { 
7631                 psignal(p
, SIGXFSZ
); 
7635         if ((error 
= fp_lookup(p
, fd
, &fp
, 0))) { 
7639         switch (FILEGLOB_DTYPE(fp
->fp_glob
)) { 
7641                 error 
= pshm_truncate(p
, fp
, uap
->fd
, uap
->length
, retval
); 
7650         vp 
= (vnode_t
)fp
->fp_glob
->fg_data
; 
7652         if ((fp
->fp_glob
->fg_flag 
& FWRITE
) == 0) { 
7653                 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
); 
7658         if ((error 
= vnode_getwithref(vp
)) != 0) { 
7662         AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
); 
7665         error 
= mac_vnode_check_truncate(ctx
, 
7666             fp
->fp_glob
->fg_cred
, vp
); 
7668                 (void)vnode_put(vp
); 
7673         VATTR_SET(&va
, va_data_size
, uap
->length
); 
7674         error 
= vnode_setattr(vp
, &va
, ctx
); 
7678                 mac_vnode_notify_truncate(ctx
, fp
->fp_glob
->fg_cred
, vp
); 
7682         (void)vnode_put(vp
); 
7690  * Sync an open file with synchronized I/O _file_ integrity completion 
7694 fsync(proc_t p
, struct fsync_args 
*uap
, __unused 
int32_t *retval
) 
7696         __pthread_testcancel(1); 
7697         return fsync_common(p
, uap
, MNT_WAIT
); 
7702  * Sync an open file with synchronized I/O _file_ integrity completion 
7704  * Notes:       This is a legacy support function that does not test for 
7705  *              thread cancellation points. 
7709 fsync_nocancel(proc_t p
, struct fsync_nocancel_args 
*uap
, __unused 
int32_t *retval
) 
7711         return fsync_common(p
, (struct fsync_args 
*)uap
, MNT_WAIT
); 
7716  * Sync an open file with synchronized I/O _data_ integrity completion 
7720 fdatasync(proc_t p
, struct fdatasync_args 
*uap
, __unused 
int32_t *retval
) 
7722         __pthread_testcancel(1); 
7723         return fsync_common(p
, (struct fsync_args 
*)uap
, MNT_DWAIT
); 
7730  * Common fsync code to support both synchronized I/O file integrity completion 
7731  * (normal fsync) and synchronized I/O data integrity completion (fdatasync). 
7733  * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which 
7734  * will only guarantee that the file data contents are retrievable.  If 
7735  * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also 
7736  * includes additional metadata unnecessary for retrieving the file data 
7737  * contents, such as atime, mtime, ctime, etc., also be committed to stable 
7740  * Parameters:  p                               The process 
7741  *              uap->fd                         The descriptor to synchronize 
7742  *              flags                           The data integrity flags 
7744  * Returns:     int                             Success 
7745  *      fp_getfvp:EBADF                         Bad file descriptor 
7746  *      fp_getfvp:ENOTSUP                       fd does not refer to a vnode 
7747  *      VNOP_FSYNC:???                          unspecified 
7749  * Notes:       We use struct fsync_args because it is a short name, and all 
7750  *              caller argument structures are otherwise identical. 
7753 fsync_common(proc_t p
, struct fsync_args 
*uap
, int flags
) 
7756         struct fileproc 
*fp
; 
7757         vfs_context_t ctx 
= vfs_context_current(); 
7760         AUDIT_ARG(fd
, uap
->fd
); 
7762         if ((error 
= fp_getfvp(p
, uap
->fd
, &fp
, &vp
))) { 
7765         if ((error 
= vnode_getwithref(vp
))) { 
7770         AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
); 
7772         error 
= VNOP_FSYNC(vp
, flags
, ctx
); 
7775         /* Sync resource fork shadow file if necessary. */ 
7777             (vp
->v_flag 
& VISNAMEDSTREAM
) && 
7778             (vp
->v_parent 
!= NULLVP
) && 
7779             vnode_isshadow(vp
) && 
7780             (fp
->fp_glob
->fg_flag 
& FWASWRITTEN
)) { 
7781                 (void) vnode_flushnamedstream(vp
->v_parent
, vp
, ctx
); 
7785         (void)vnode_put(vp
); 
7791  * Duplicate files.  Source must be a file, target must be a file or 
7794  * XXX Copyfile authorisation checking is woefully inadequate, and will not 
7795  *     perform inheritance correctly. 
7799 copyfile(__unused proc_t p
, struct copyfile_args 
*uap
, __unused 
int32_t *retval
) 
7801         vnode_t tvp
, fvp
, tdvp
, sdvp
; 
7802         struct nameidata fromnd
, tond
; 
7804         vfs_context_t ctx 
= vfs_context_current(); 
7806         struct filedesc 
*fdp 
= (vfs_context_proc(ctx
))->p_fd
; 
7807         struct vnode_attr va
; 
7810         /* Check that the flags are valid. */ 
7812         if (uap
->flags 
& ~CPF_MASK
) { 
7816         NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, AUDITVNPATH1
, 
7817             UIO_USERSPACE
, uap
->from
, ctx
); 
7818         if ((error 
= namei(&fromnd
))) { 
7823         NDINIT(&tond
, CREATE
, OP_LINK
, 
7824             LOCKPARENT 
| LOCKLEAF 
| NOCACHE 
| SAVESTART 
| AUDITVNPATH2 
| CN_NBMOUNTLOOK
, 
7825             UIO_USERSPACE
, uap
->to
, ctx
); 
7826         if ((error 
= namei(&tond
))) { 
7833                 if (!(uap
->flags 
& CPF_OVERWRITE
)) { 
7839         if (fvp
->v_type 
== VDIR 
|| (tvp 
&& tvp
->v_type 
== VDIR
)) { 
7844         /* This calls existing MAC hooks for open  */ 
7845         if ((error 
= vn_authorize_open_existing(fvp
, &fromnd
.ni_cnd
, FREAD
, ctx
, 
7852                  * See unlinkat_internal for an explanation of the potential 
7853                  * ENOENT from the MAC hook but the gist is that the MAC hook 
7854                  * can fail because vn_getpath isn't able to return the full 
7855                  * path. We choose to ignore this failure. 
7857                 error 
= vn_authorize_unlink(tdvp
, tvp
, &tond
.ni_cnd
, ctx
, NULL
); 
7858                 if (error 
&& error 
!= ENOENT
) { 
7866         VATTR_SET(&va
, va_type
, fvp
->v_type
); 
7867         /* Mask off all but regular access permissions */ 
7868         VATTR_SET(&va
, va_mode
, 
7869             ((((uap
->mode 
& ~fdp
->fd_cmask
) & ALLPERMS
) & ~S_ISTXT
) & ACCESSPERMS
)); 
7870         error 
= mac_vnode_check_create(ctx
, tdvp
, &tond
.ni_cnd
, &va
); 
7874 #endif /* CONFIG_MACF */ 
7876         if ((error 
= vnode_authorize(tdvp
, NULL
, KAUTH_VNODE_ADD_FILE
, ctx
)) != 0) { 
7884          * If source is the same as the destination (that is the 
7885          * same inode number) then there is nothing to do. 
7886          * (fixed to have POSIX semantics - CSM 3/2/98) 
7892                 error 
= VNOP_COPYFILE(fvp
, tdvp
, tvp
, &tond
.ni_cnd
, uap
->mode
, uap
->flags
, ctx
); 
7895         sdvp 
= tond
.ni_startdir
; 
7897          * nameidone has to happen before we vnode_put(tdvp) 
7898          * since it may need to release the fs_nodelock on the tdvp 
7918 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1 
7921  * Helper function for doing clones. The caller is expected to provide an 
7922  * iocounted source vnode and release it. 
7925 clonefile_internal(vnode_t fvp
, boolean_t data_read_authorised
, int dst_dirfd
, 
7926     user_addr_t dst
, uint32_t flags
, vfs_context_t ctx
) 
7929         struct nameidata tond
; 
7932         boolean_t free_src_acl
; 
7933         boolean_t attr_cleanup
; 
7935         kauth_action_t action
; 
7936         struct componentname 
*cnp
; 
7938         struct vnode_attr va
; 
7939         struct vnode_attr nva
; 
7940         uint32_t vnop_flags
; 
7942         v_type 
= vnode_vtype(fvp
); 
7947                 action 
= KAUTH_VNODE_ADD_FILE
; 
7950                 if (vnode_isvroot(fvp
) || vnode_ismount(fvp
) || 
7951                     fvp
->v_mountedhere
) { 
7954                 action 
= KAUTH_VNODE_ADD_SUBDIRECTORY
; 
7960         AUDIT_ARG(fd2
, dst_dirfd
); 
7961         AUDIT_ARG(value32
, flags
); 
7963         follow 
= (flags 
& CLONE_NOFOLLOW
) ? NOFOLLOW 
: FOLLOW
; 
7964         NDINIT(&tond
, CREATE
, OP_LINK
, follow 
| WANTPARENT 
| AUDITVNPATH2
, 
7965             UIO_USERSPACE
, dst
, ctx
); 
7966         if ((error 
= nameiat(&tond
, dst_dirfd
))) { 
7973         free_src_acl 
= FALSE
; 
7974         attr_cleanup 
= FALSE
; 
7981         if (vnode_mount(tdvp
) != vnode_mount(fvp
)) { 
7987         if ((error 
= mac_vnode_check_clone(ctx
, tdvp
, fvp
, cnp
))) { 
7991         if ((error 
= vnode_authorize(tdvp
, NULL
, action
, ctx
))) { 
7995         action 
= KAUTH_VNODE_GENERIC_READ_BITS
; 
7996         if (data_read_authorised
) { 
7997                 action 
&= ~KAUTH_VNODE_READ_DATA
; 
7999         if ((error 
= vnode_authorize(fvp
, NULL
, action
, ctx
))) { 
8004          * certain attributes may need to be changed from the source, we ask for 
8005          * those here with the exception of source file's ACL. The clone file 
8006          * will inherit the target directory's ACL. 
8009         VATTR_WANTED(&va
, va_uid
); 
8010         VATTR_WANTED(&va
, va_gid
); 
8011         VATTR_WANTED(&va
, va_mode
); 
8012         VATTR_WANTED(&va
, va_flags
); 
8014         if ((error 
= vnode_getattr(fvp
, &va
, ctx
)) != 0) { 
8019         VATTR_SET(&nva
, va_type
, v_type
); 
8020         if (VATTR_IS_SUPPORTED(&va
, va_acl
) && va
.va_acl 
!= NULL
) { 
8021                 VATTR_SET(&nva
, va_acl
, va
.va_acl
); 
8022                 free_src_acl 
= TRUE
; 
8025         /* Handle ACL inheritance, initialize vap. */ 
8026         if (v_type 
== VLNK
) { 
8027                 error 
= vnode_authattr_new(tdvp
, &nva
, 0, ctx
); 
8029                 error 
= vn_attribute_prepare(tdvp
, &nva
, &defaulted
, ctx
); 
8033                 attr_cleanup 
= TRUE
; 
8036         vnop_flags 
= VNODE_CLONEFILE_DEFAULT
; 
8038          * We've got initial values for all security parameters, 
8039          * If we are superuser, then we can change owners to be the 
8040          * same as the source. Both superuser and the owner have default 
8041          * WRITE_SECURITY privileges so all other fields can be taken 
8042          * from source as well. 
8044         if (!(flags 
& CLONE_NOOWNERCOPY
) && vfs_context_issuser(ctx
)) { 
8045                 if (VATTR_IS_SUPPORTED(&va
, va_uid
)) { 
8046                         VATTR_SET(&nva
, va_uid
, va
.va_uid
); 
8048                 if (VATTR_IS_SUPPORTED(&va
, va_gid
)) { 
8049                         VATTR_SET(&nva
, va_gid
, va
.va_gid
); 
8052                 vnop_flags 
|= VNODE_CLONEFILE_NOOWNERCOPY
; 
8055         if (VATTR_IS_SUPPORTED(&va
, va_mode
)) { 
8056                 VATTR_SET(&nva
, va_mode
, va
.va_mode
); 
8058         if (VATTR_IS_SUPPORTED(&va
, va_flags
)) { 
8059                 VATTR_SET(&nva
, va_flags
, 
8060                     ((va
.va_flags 
& ~(UF_DATAVAULT 
| SF_RESTRICTED
)) | /* Turn off from source */ 
8061                     (nva
.va_flags 
& (UF_DATAVAULT 
| SF_RESTRICTED
)))); 
8064         error 
= VNOP_CLONEFILE(fvp
, tdvp
, &tvp
, cnp
, &nva
, vnop_flags
, ctx
); 
8066         if (!error 
&& tvp
) { 
8067                 int     update_flags 
= 0; 
8070 #endif /* CONFIG_FSE */ 
8073                  * If some of the requested attributes weren't handled by the 
8074                  * VNOP, use our fallback code. 
8076                 if (!VATTR_ALL_SUPPORTED(&nva
)) { 
8077                         (void)vnode_setattr_fallback(tvp
, &nva
, ctx
); 
8081                 (void)vnode_label(vnode_mount(tvp
), tdvp
, tvp
, cnp
, 
8082                     VNODE_LABEL_CREATE
, ctx
); 
8085                 // Make sure the name & parent pointers are hooked up 
8086                 if (tvp
->v_name 
== NULL
) { 
8087                         update_flags 
|= VNODE_UPDATE_NAME
; 
8089                 if (tvp
->v_parent 
== NULLVP
) { 
8090                         update_flags 
|= VNODE_UPDATE_PARENT
; 
8094                         (void)vnode_update_identity(tvp
, tdvp
, cnp
->cn_nameptr
, 
8095                             cnp
->cn_namelen
, cnp
->cn_hash
, update_flags
); 
8099                 switch (vnode_vtype(tvp
)) { 
8103                         fsevent 
= FSE_CREATE_FILE
; 
8106                         fsevent 
= FSE_CREATE_DIR
; 
8112                 if (need_fsevent(fsevent
, tvp
)) { 
8114                          * The following is a sequence of three explicit events. 
8115                          * A pair of FSE_CLONE events representing the source and destination 
8116                          * followed by an FSE_CREATE_[FILE | DIR] for the destination. 
8117                          * fseventsd may coalesce the destination clone and create events 
8118                          * into a single event resulting in the following sequence for a client 
8120                          * FSE_CLONE | FSE_CREATE (dst) 
8122                         add_fsevent(FSE_CLONE
, ctx
, FSE_ARG_VNODE
, fvp
, FSE_ARG_VNODE
, tvp
, 
8124                         add_fsevent(fsevent
, ctx
, FSE_ARG_VNODE
, tvp
, 
8127 #endif /* CONFIG_FSE */ 
8132                 vn_attribute_cleanup(&nva
, defaulted
); 
8134         if (free_src_acl 
&& va
.va_acl
) { 
8135                 kauth_acl_free(va
.va_acl
); 
8146  * clone files or directories, target must not exist. 
8150 clonefileat(__unused proc_t p
, struct clonefileat_args 
*uap
, 
8151     __unused 
int32_t *retval
) 
8154         struct nameidata fromnd
; 
8157         vfs_context_t ctx 
= vfs_context_current(); 
8159         /* Check that the flags are valid. */ 
8160         if (uap
->flags 
& ~(CLONE_NOFOLLOW 
| CLONE_NOOWNERCOPY
)) { 
8164         AUDIT_ARG(fd
, uap
->src_dirfd
); 
8166         follow 
= (uap
->flags 
& CLONE_NOFOLLOW
) ? NOFOLLOW 
: FOLLOW
; 
8167         NDINIT(&fromnd
, LOOKUP
, OP_COPYFILE
, follow 
| AUDITVNPATH1
, 
8168             UIO_USERSPACE
, uap
->src
, ctx
); 
8169         if ((error 
= nameiat(&fromnd
, uap
->src_dirfd
))) { 
8176         error 
= clonefile_internal(fvp
, FALSE
, uap
->dst_dirfd
, uap
->dst
, 
8184 fclonefileat(__unused proc_t p
, struct fclonefileat_args 
*uap
, 
8185     __unused 
int32_t *retval
) 
8188         struct fileproc 
*fp
; 
8190         vfs_context_t ctx 
= vfs_context_current(); 
8192         /* Check that the flags are valid. */ 
8193         if (uap
->flags 
& ~(CLONE_NOFOLLOW 
| CLONE_NOOWNERCOPY
)) { 
8197         AUDIT_ARG(fd
, uap
->src_fd
); 
8198         error 
= fp_getfvp(p
, uap
->src_fd
, &fp
, &fvp
); 
8203         if ((fp
->fp_glob
->fg_flag 
& FREAD
) == 0) { 
8204                 AUDIT_ARG(vnpath_withref
, fvp
, ARG_VNODE1
); 
8209         if ((error 
= vnode_getwithref(fvp
))) { 
8213         AUDIT_ARG(vnpath
, fvp
, ARG_VNODE1
); 
8215         error 
= clonefile_internal(fvp
, TRUE
, uap
->dst_dirfd
, uap
->dst
, 
8220         file_drop(uap
->src_fd
); 
8225 rename_submounts_callback(mount_t mp
, void *arg
) 
8228         mount_t pmp 
= (mount_t
)arg
; 
8229         int prefix_len 
= (int)strlen(pmp
->mnt_vfsstat
.f_mntonname
); 
8231         if (strncmp(mp
->mnt_vfsstat
.f_mntonname
, pmp
->mnt_vfsstat
.f_mntonname
, prefix_len
) != 0) { 
8235         if (mp
->mnt_vfsstat
.f_mntonname
[prefix_len
] != '/') { 
8239         if ((error 
= vfs_busy(mp
, LK_NOWAIT
))) { 
8240                 printf("vfs_busy failed with %d for %s\n", error
, mp
->mnt_vfsstat
.f_mntonname
); 
8244         int pathlen 
= MAXPATHLEN
; 
8245         if ((error 
= vn_getpath_ext(mp
->mnt_vnodecovered
, NULL
, mp
->mnt_vfsstat
.f_mntonname
, &pathlen
, VN_GETPATH_FSENTER
))) { 
8246                 printf("vn_getpath_ext failed with %d for mnt_vnodecovered of %s\n", error
, mp
->mnt_vfsstat
.f_mntonname
); 
8255  * Rename files.  Source and destination must either both be directories, 
8256  * or both not be directories.  If target is a directory, it must be empty. 
8260 renameat_internal(vfs_context_t ctx
, int fromfd
, user_addr_t from
, 
8261     int tofd
, user_addr_t to
, int segflg
, vfs_rename_flags_t flags
) 
8263         if (flags 
& ~VFS_RENAME_FLAGS_MASK
) { 
8267         if (ISSET(flags
, VFS_RENAME_SWAP
) && ISSET(flags
, VFS_RENAME_EXCL
)) { 
8274         struct nameidata 
*fromnd
, *tond
; 
8282         const char *oname 
= NULL
; 
8283         char *from_name 
= NULL
, *to_name 
= NULL
; 
8284         char *from_name_no_firmlink 
= NULL
, *to_name_no_firmlink 
= NULL
; 
8285         int from_len 
= 0, to_len 
= 0; 
8286         int from_len_no_firmlink 
= 0, to_len_no_firmlink 
= 0; 
8287         int holding_mntlock
; 
8288         int vn_authorize_skipped
; 
8289         mount_t locked_mp 
= NULL
; 
8290         vnode_t oparent 
= NULLVP
; 
8292         fse_info from_finfo
, to_finfo
; 
8294         int from_truncated 
= 0, to_truncated 
= 0; 
8295         int from_truncated_no_firmlink 
= 0, to_truncated_no_firmlink 
= 0; 
8297         struct vnode_attr 
*fvap
, *tvap
; 
8299         /* carving out a chunk for structs that are too big to be on stack. */ 
8301                 struct nameidata from_node
, to_node
; 
8302                 struct vnode_attr fv_attr
, tv_attr
; 
8304         __rename_data 
= kheap_alloc(KHEAP_TEMP
, sizeof(*__rename_data
), Z_WAITOK
); 
8305         fromnd 
= &__rename_data
->from_node
; 
8306         tond 
= &__rename_data
->to_node
; 
8308         holding_mntlock 
= 0; 
8317         vn_authorize_skipped 
= FALSE
; 
8319         NDINIT(fromnd
, DELETE
, OP_UNLINK
, WANTPARENT 
| AUDITVNPATH1
, 
8321         fromnd
->ni_flag 
= NAMEI_COMPOUNDRENAME
; 
8323         NDINIT(tond
, RENAME
, OP_RENAME
, WANTPARENT 
| AUDITVNPATH2 
| CN_NBMOUNTLOOK
, 
8325         tond
->ni_flag 
= NAMEI_COMPOUNDRENAME
; 
8328         if ((fromnd
->ni_flag 
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) { 
8329                 if ((error 
= nameiat(fromnd
, fromfd
))) { 
8332                 fdvp 
= fromnd
->ni_dvp
; 
8333                 fvp  
= fromnd
->ni_vp
; 
8335                 if (fvp 
&& fvp
->v_type 
== VDIR
) { 
8336                         tond
->ni_cnd
.cn_flags 
|= WILLBEDIR
; 
8340         if ((tond
->ni_flag 
& NAMEI_CONTLOOKUP
) != 0 || !continuing
) { 
8341                 if ((error 
= nameiat(tond
, tofd
))) { 
8343                          * Translate error code for rename("dir1", "dir2/."). 
8345                         if (error 
== EISDIR 
&& fvp
->v_type 
== VDIR
) { 
8350                 tdvp 
= tond
->ni_dvp
; 
8354 #if DEVELOPMENT || DEBUG 
8356          * XXX VSWAP: Check for entitlements or special flag here 
8357          * so we can restrict access appropriately. 
8359 #else /* DEVELOPMENT || DEBUG */ 
8361         if (fromnd
->ni_vp 
&& vnode_isswap(fromnd
->ni_vp
) && (ctx 
!= vfs_context_kernel())) { 
8366         if (tond
->ni_vp 
&& vnode_isswap(tond
->ni_vp
) && (ctx 
!= vfs_context_kernel())) { 
8370 #endif /* DEVELOPMENT || DEBUG */ 
8372         if (!tvp 
&& ISSET(flags
, VFS_RENAME_SWAP
)) { 
8377         if (tvp 
&& ISSET(flags
, VFS_RENAME_EXCL
)) { 
8382                  * We allow rename with VFS_RENAME_EXCL flag for an existing file which 
8383                  * has the same name as target iff the following conditions are met: 
8384                  * 1. the target file system is case insensitive 
8385                  * 2. source and target directories are the same 
8386                  * 3. source and target files are the same 
8387                  * 4. name only differs in case (determined by underlying filesystem) 
8389                 if (fvp 
!= tvp 
|| fdvp 
!= tdvp
) { 
8395                  * Assume that the target file system is case sensitive if 
8396                  * _PC_CASE_SENSITIVE selector isn't supported. 
8398                 err 
= VNOP_PATHCONF(tvp
, _PC_CASE_SENSITIVE
, &pval
, ctx
); 
8399                 if (err 
!= 0 || pval 
!= 0) { 
8405         batched 
= vnode_compound_rename_available(fdvp
); 
8408         need_event 
= need_fsevent(FSE_RENAME
, fdvp
); 
8411                         get_fse_info(fvp
, &from_finfo
, ctx
); 
8413                         error 
= vfs_get_notify_attributes(&__rename_data
->fv_attr
); 
8418                         fvap 
= &__rename_data
->fv_attr
; 
8422                         get_fse_info(tvp
, &to_finfo
, ctx
); 
8423                 } else if (batched
) { 
8424                         error 
= vfs_get_notify_attributes(&__rename_data
->tv_attr
); 
8429                         tvap 
= &__rename_data
->tv_attr
; 
8434 #endif /* CONFIG_FSE */ 
8436         has_listeners 
= kauth_authorize_fileop_has_listeners(); 
8440         if (AUDIT_RECORD_EXISTS()) { 
8445         if (need_event 
|| has_listeners
) { 
8446                 if (from_name 
== NULL
) { 
8447                         GET_PATH(from_name
); 
8450                 from_len 
= safe_getpath(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name
, MAXPATHLEN
, &from_truncated
); 
8452                 if (from_name_no_firmlink 
== NULL
) { 
8453                         GET_PATH(from_name_no_firmlink
); 
8456                 from_len_no_firmlink 
= safe_getpath_no_firmlink(fdvp
, fromnd
->ni_cnd
.cn_nameptr
, from_name_no_firmlink
, MAXPATHLEN
, &from_truncated_no_firmlink
); 
8459         if (need_event 
|| need_kpath2 
|| has_listeners
) { 
8460                 if (to_name 
== NULL
) { 
8464                 to_len 
= safe_getpath(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name
, MAXPATHLEN
, &to_truncated
); 
8466                 if (to_name_no_firmlink 
== NULL
) { 
8467                         GET_PATH(to_name_no_firmlink
); 
8470                 to_len_no_firmlink 
= safe_getpath_no_firmlink(tdvp
, tond
->ni_cnd
.cn_nameptr
, to_name_no_firmlink
, MAXPATHLEN
, &to_truncated_no_firmlink
); 
8471                 if (to_name 
&& need_kpath2
) { 
8472                         AUDIT_ARG(kpath
, to_name
, ARG_KPATH2
); 
8477                  * Claim: this check will never reject a valid rename. 
8478                  * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp. 
8479                  * Suppose fdvp and tdvp are not on the same mount. 
8480                  * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem.  If fvp is the root, 
8481                  *      then you can't move it to within another dir on the same mountpoint. 
8482                  * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction. 
8484                  * If this check passes, then we are safe to pass these vnodes to the same FS. 
8486                 if (fdvp
->v_mount 
!= tdvp
->v_mount
) { 
8490                 goto skipped_lookup
; 
8494          * If the source and destination are the same (i.e. they're 
8495          * links to the same vnode) and the target file system is 
8496          * case sensitive, then there is nothing to do. 
8498          * XXX Come back to this. 
8504                  * Note: if _PC_CASE_SENSITIVE selector isn't supported, 
8505                  * then assume that this file system is case sensitive. 
8507                 if (VNOP_PATHCONF(fvp
, _PC_CASE_SENSITIVE
, &pathconf_val
, ctx
) != 0 || 
8508                     pathconf_val 
!= 0) { 
8509                         vn_authorize_skipped 
= TRUE
; 
8515          * Allow the renaming of mount points. 
8516          * - target must not exist 
8517          * - target must reside in the same directory as source 
8518          * - union mounts cannot be renamed 
8519          * - the root fs, and tightly-linked system volumes, cannot be renamed 
8521          * XXX Handle this in VFS after a continued lookup (if we missed 
8522          * in the cache to start off) 
8524          * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so 
8525          * we'll skip past here.  The file system is responsible for 
8526          * checking that @tvp is not a descendent of @fvp and vice versa 
8527          * so it should always return EINVAL if either @tvp or @fvp is the 
8530         if ((fvp
->v_flag 
& VROOT
) && 
8531             (fvp
->v_type 
== VDIR
) && 
8533             (fvp
->v_mountedhere 
== NULL
) && 
8535             ((fvp
->v_mount
->mnt_flag 
& (MNT_UNION 
| MNT_ROOTFS
)) == 0) && 
8536             ((fvp
->v_mount
->mnt_kern_flag 
& MNTK_SYSTEM
) == 0) && 
8537             (fvp
->v_mount
->mnt_vnodecovered 
!= NULLVP
)) { 
8540                 /* switch fvp to the covered vnode */ 
8541                 coveredvp 
= fvp
->v_mount
->mnt_vnodecovered
; 
8542                 if ((vnode_getwithref(coveredvp
))) { 
8547                  * Save the 'fvp' as it is needed for vn_authorize_renamex_with_paths() 
8556          * Check for cross-device rename. 
8558         if ((fvp
->v_mount 
!= tdvp
->v_mount
) || 
8559             (tvp 
&& (fvp
->v_mount 
!= tvp
->v_mount
))) { 
8565          * If source is the same as the destination (that is the 
8566          * same inode number) then there is nothing to do... 
8567          * EXCEPT if the underlying file system supports case 
8568          * insensitivity and is case preserving.  In this case 
8569          * the file system needs to handle the special case of 
8570          * getting the same vnode as target (fvp) and source (tvp). 
8572          * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE 
8573          * and _PC_CASE_PRESERVING can have this exception, and they need to 
8574          * handle the special case of getting the same vnode as target and 
8575          * source.  NOTE: Then the target is unlocked going into vnop_rename, 
8576          * so not to cause locking problems. There is a single reference on tvp. 
8578          * NOTE - that fvp == tvp also occurs if they are hard linked and 
8579          * that correct behaviour then is just to return success without doing 
8582          * XXX filesystem should take care of this itself, perhaps... 
8584         if (fvp 
== tvp 
&& fdvp 
== tdvp
) { 
8585                 if (fromnd
->ni_cnd
.cn_namelen 
== tond
->ni_cnd
.cn_namelen 
&& 
8586                     !bcmp(fromnd
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_nameptr
, 
8587                     fromnd
->ni_cnd
.cn_namelen
)) { 
8588                         vn_authorize_skipped 
= TRUE
; 
8593         if (holding_mntlock 
&& fvp
->v_mount 
!= locked_mp
) { 
8595                  * we're holding a reference and lock 
8596                  * on locked_mp, but it no longer matches 
8597                  * what we want to do... so drop our hold 
8599                 mount_unlock_renames(locked_mp
); 
8600                 mount_drop(locked_mp
, 0); 
8601                 holding_mntlock 
= 0; 
8603         if (tdvp 
!= fdvp 
&& fvp
->v_type 
== VDIR
) { 
8605                  * serialize renames that re-shape 
8606                  * the tree... if holding_mntlock is 
8607                  * set, then we're ready to go... 
8609                  * first need to drop the iocounts 
8610                  * we picked up, second take the 
8611                  * lock to serialize the access, 
8612                  * then finally start the lookup 
8613                  * process over with the lock held 
8615                 if (!holding_mntlock
) { 
8617                          * need to grab a reference on 
8618                          * the mount point before we 
8619                          * drop all the iocounts... once 
8620                          * the iocounts are gone, the mount 
8623                         locked_mp 
= fvp
->v_mount
; 
8624                         mount_ref(locked_mp
, 0); 
8627                          * nameidone has to happen before we vnode_put(tvp) 
8628                          * since it may need to release the fs_nodelock on the tvp 
8638                          * nameidone has to happen before we vnode_put(fdvp) 
8639                          * since it may need to release the fs_nodelock on the fvp 
8646                         if (mnt_fvp 
!= NULLVP
) { 
8650                         mount_lock_renames(locked_mp
); 
8651                         holding_mntlock 
= 1; 
8657                  * when we dropped the iocounts to take 
8658                  * the lock, we allowed the identity of 
8659                  * the various vnodes to change... if they did, 
8660                  * we may no longer be dealing with a rename 
8661                  * that reshapes the tree... once we're holding 
8662                  * the iocounts, the vnodes can't change type 
8663                  * so we're free to drop the lock at this point 
8666                 if (holding_mntlock
) { 
8667                         mount_unlock_renames(locked_mp
); 
8668                         mount_drop(locked_mp
, 0); 
8669                         holding_mntlock 
= 0; 
8674                 error 
= vn_authorize_renamex_with_paths(fdvp
, mntrename 
? mnt_fvp 
: fvp
, 
8675                     &fromnd
->ni_cnd
, from_name
, tdvp
, tvp
, &tond
->ni_cnd
, to_name
, ctx
, 
8678                         if (error 
== ENOENT
) { 
8679                                 if (retry_count 
< MAX_AUTHORIZE_ENOENT_RETRIES
) { 
8681                                          * We encountered a race where after doing the namei, 
8682                                          * tvp stops being valid. If so, simply re-drive the rename 
8683                                          * call from the top. 
8693         /* Release the 'mnt_fvp' now that it is no longer needed. */ 
8694         if (mnt_fvp 
!= NULLVP
) { 
8699         // save these off so we can later verify that fvp is the same 
8700         oname   
= fvp
->v_name
; 
8701         oparent 
= fvp
->v_parent
; 
8704         error 
= vn_rename(fdvp
, &fvp
, &fromnd
->ni_cnd
, fvap
, 
8705             tdvp
, &tvp
, &tond
->ni_cnd
, tvap
, 
8708         if (holding_mntlock
) { 
8710                  * we can drop our serialization 
8713                 mount_unlock_renames(locked_mp
); 
8714                 mount_drop(locked_mp
, 0); 
8715                 holding_mntlock 
= 0; 
8718                 if (error 
== EDATALESS
) { 
8720                          * If we've been here before, something has gone 
8721                          * horribly wrong and we should just get out lest 
8722                          * we spiral around the drain forever. 
8724                         if (flags 
& VFS_RENAME_DATALESS
) { 
8730                          * The object we're renaming is dataless (or has a 
8731                          * dataless descendent) and requires materialization 
8732                          * before the rename occurs.  But we're holding the 
8733                          * mount point's rename lock, so it's not safe to 
8736                          * In this case, we release the lock, perform the 
8737                          * materialization, and start the whole thing over. 
8739                         error 
= vnode_materialize_dataless_file(fvp
, 
8740                             NAMESPACE_HANDLER_RENAME_OP
); 
8744                                  * The next time around we need to tell the 
8745                                  * file system that the materializtaion has 
8748                                 flags 
|= VFS_RENAME_DATALESS
; 
8753                 if (error 
== EKEEPLOOKING
) { 
8754                         if ((fromnd
->ni_flag 
& NAMEI_CONTLOOKUP
) == 0) { 
8755                                 if ((tond
->ni_flag 
& NAMEI_CONTLOOKUP
) == 0) { 
8756                                         panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?"); 
8760                         fromnd
->ni_vp 
= fvp
; 
8763                         goto continue_lookup
; 
8767                  * We may encounter a race in the VNOP where the destination didn't 
8768                  * exist when we did the namei, but it does by the time we go and 
8769                  * try to create the entry. In this case, we should re-drive this rename 
8770                  * call from the top again.  Currently, only HFS bubbles out ERECYCLE, 
8771                  * but other filesystems susceptible to this race could return it, too. 
8773                 if (error 
== ERECYCLE
) { 
8774                         if (retry_count 
< MAX_RENAME_ERECYCLE_RETRIES
) { 
8778                                 printf("rename retry limit due to ERECYCLE reached\n"); 
8784                  * For compound VNOPs, the authorization callback may return 
8785                  * ENOENT in case of racing hardlink lookups hitting the name 
8786                  * cache, redrive the lookup. 
8788                 if (batched 
&& error 
== ENOENT
) { 
8789                         if (retry_count 
< MAX_AUTHORIZE_ENOENT_RETRIES
) { 
8798         /* call out to allow 3rd party notification of rename. 
8799          * Ignore result of kauth_authorize_fileop call. 
8801         kauth_authorize_fileop(vfs_context_ucred(ctx
), 
8802             KAUTH_FILEOP_RENAME
, 
8803             (uintptr_t)from_name
, (uintptr_t)to_name
); 
8804         if (flags 
& VFS_RENAME_SWAP
) { 
8805                 kauth_authorize_fileop(vfs_context_ucred(ctx
), 
8806                     KAUTH_FILEOP_RENAME
, 
8807                     (uintptr_t)to_name
, (uintptr_t)from_name
); 
8811         if (from_name 
!= NULL 
&& to_name 
!= NULL
) { 
8812                 if (from_truncated 
|| to_truncated
) { 
8813                         // set it here since only the from_finfo gets reported up to user space 
8814                         from_finfo
.mode 
|= FSE_TRUNCATED_PATH
; 
8818                         vnode_get_fse_info_from_vap(tvp
, &to_finfo
, tvap
); 
8821                         vnode_get_fse_info_from_vap(fvp
, &from_finfo
, fvap
); 
8825                         add_fsevent(FSE_RENAME
, ctx
, 
8826                             FSE_ARG_STRING
, from_len_no_firmlink
, from_name_no_firmlink
, 
8827                             FSE_ARG_FINFO
, &from_finfo
, 
8828                             FSE_ARG_STRING
, to_len_no_firmlink
, to_name_no_firmlink
, 
8829                             FSE_ARG_FINFO
, &to_finfo
, 
8831                         if (flags 
& VFS_RENAME_SWAP
) { 
8833                                  * Strictly speaking, swap is the equivalent of 
8834                                  * *three* renames.  FSEvents clients should only take 
8835                                  * the events as a hint, so we only bother reporting 
8838                                 add_fsevent(FSE_RENAME
, ctx
, 
8839                                     FSE_ARG_STRING
, to_len_no_firmlink
, to_name_no_firmlink
, 
8840                                     FSE_ARG_FINFO
, &to_finfo
, 
8841                                     FSE_ARG_STRING
, from_len_no_firmlink
, from_name_no_firmlink
, 
8842                                     FSE_ARG_FINFO
, &from_finfo
, 
8846                         add_fsevent(FSE_RENAME
, ctx
, 
8847                             FSE_ARG_STRING
, from_len_no_firmlink
, from_name_no_firmlink
, 
8848                             FSE_ARG_FINFO
, &from_finfo
, 
8849                             FSE_ARG_STRING
, to_len_no_firmlink
, to_name_no_firmlink
, 
8853 #endif /* CONFIG_FSE */ 
8856          * update filesystem's mount point data 
8859                 char *cp
, *pathend
, *mpname
; 
8865                 mp 
= fvp
->v_mountedhere
; 
8867                 if (vfs_busy(mp
, LK_NOWAIT
)) { 
8871                 tobuf 
= zalloc(ZV_NAMEI
); 
8873                 if (UIO_SEG_IS_USER_SPACE(segflg
)) { 
8874                         error 
= copyinstr(to
, tobuf
, MAXPATHLEN
, &len
); 
8876                         error 
= copystr((void *)to
, tobuf
, MAXPATHLEN
, &len
); 
8879                         /* find current mount point prefix */ 
8880                         pathend 
= &mp
->mnt_vfsstat
.f_mntonname
[0]; 
8881                         for (cp 
= pathend
; *cp 
!= '\0'; ++cp
) { 
8886                         /* find last component of target name */ 
8887                         for (mpname 
= cp 
= tobuf
; *cp 
!= '\0'; ++cp
) { 
8893                         /* Update f_mntonname of sub mounts */ 
8894                         vfs_iterate(0, rename_submounts_callback
, (void *)mp
); 
8896                         /* append name to prefix */ 
8897                         maxlen 
= MAXPATHLEN 
- (int)(pathend 
- mp
->mnt_vfsstat
.f_mntonname
); 
8898                         bzero(pathend
, maxlen
); 
8900                         strlcpy(pathend
, mpname
, maxlen
); 
8902                 zfree(ZV_NAMEI
, tobuf
); 
8906                 vfs_event_signal(NULL
, VQ_UPDATE
, (intptr_t)NULL
); 
8909          * fix up name & parent pointers.  note that we first 
8910          * check that fvp has the same name/parent pointers it 
8911          * had before the rename call... this is a 'weak' check 
8914          * XXX oparent and oname may not be set in the compound vnop case 
8916         if (batched 
|| (oname 
== fvp
->v_name 
&& oparent 
== fvp
->v_parent
)) { 
8919                 update_flags 
= VNODE_UPDATE_NAME
; 
8922                         update_flags 
|= VNODE_UPDATE_PARENT
; 
8925                 vnode_update_identity(fvp
, tdvp
, tond
->ni_cnd
.cn_nameptr
, tond
->ni_cnd
.cn_namelen
, tond
->ni_cnd
.cn_hash
, update_flags
); 
8929          * There are some cases (for e.g. 'fvp == tvp') when vn_authorize was 
8930          * skipped earlier as no actual rename was performed. 
8932         if (vn_authorize_skipped 
&& error 
== 0) { 
8933                 error 
= vn_authorize_renamex_with_paths(fdvp
, fvp
, 
8934                     &fromnd
->ni_cnd
, from_name
, tdvp
, tvp
, &tond
->ni_cnd
, to_name
, ctx
, 
8936                 if (error 
&& error 
== ENOENT
) { 
8937                         if (retry_count 
< MAX_AUTHORIZE_ENOENT_RETRIES
) { 
8943         if (to_name 
!= NULL
) { 
8944                 RELEASE_PATH(to_name
); 
8947         if (to_name_no_firmlink 
!= NULL
) { 
8948                 RELEASE_PATH(to_name_no_firmlink
); 
8949                 to_name_no_firmlink 
= NULL
; 
8951         if (from_name 
!= NULL
) { 
8952                 RELEASE_PATH(from_name
); 
8955         if (from_name_no_firmlink 
!= NULL
) { 
8956                 RELEASE_PATH(from_name_no_firmlink
); 
8957                 from_name_no_firmlink 
= NULL
; 
8959         if (holding_mntlock
) { 
8960                 mount_unlock_renames(locked_mp
); 
8961                 mount_drop(locked_mp
, 0); 
8962                 holding_mntlock 
= 0; 
8966                  * nameidone has to happen before we vnode_put(tdvp) 
8967                  * since it may need to release the fs_nodelock on the tdvp 
8978                  * nameidone has to happen before we vnode_put(fdvp) 
8979                  * since it may need to release the fs_nodelock on the fdvp 
8988         if (mnt_fvp 
!= NULLVP
) { 
8992          * If things changed after we did the namei, then we will re-drive 
8993          * this rename call from the top. 
9000         kheap_free(KHEAP_TEMP
, __rename_data
, sizeof(*__rename_data
)); 
9005 rename(__unused proc_t p
, struct rename_args 
*uap
, __unused 
int32_t *retval
) 
9007         return renameat_internal(vfs_context_current(), AT_FDCWD
, uap
->from
, 
9008                    AT_FDCWD
, uap
->to
, UIO_USERSPACE
, 0); 
9012 renameatx_np(__unused proc_t p
, struct renameatx_np_args 
*uap
, __unused 
int32_t *retval
) 
9014         return renameat_internal( 
9015                 vfs_context_current(), 
9016                 uap
->fromfd
, uap
->from
, 
9018                 UIO_USERSPACE
, uap
->flags
); 
9022 renameat(__unused proc_t p
, struct renameat_args 
*uap
, __unused 
int32_t *retval
) 
9024         return renameat_internal(vfs_context_current(), uap
->fromfd
, uap
->from
, 
9025                    uap
->tofd
, uap
->to
, UIO_USERSPACE
, 0); 
9029  * Make a directory file. 
9031  * Returns:     0                       Success 
9034  *      vnode_authorize:??? 
9039 mkdir1at(vfs_context_t ctx
, user_addr_t path
, struct vnode_attr 
*vap
, int fd
, 
9040     enum uio_seg segflg
) 
9044         int update_flags 
= 0; 
9046         struct nameidata nd
; 
9048         AUDIT_ARG(mode
, vap
->va_mode
); 
9049         NDINIT(&nd
, CREATE
, OP_MKDIR
, LOCKPARENT 
| AUDITVNPATH1
, segflg
, 
9051         nd
.ni_cnd
.cn_flags 
|= WILLBEDIR
; 
9052         nd
.ni_flag 
= NAMEI_COMPOUNDMKDIR
; 
9055         error 
= nameiat(&nd
, fd
); 
9067         batched 
= vnode_compound_mkdir_available(dvp
); 
9069         VATTR_SET(vap
, va_type
, VDIR
); 
9073          * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will 
9074          * only get EXISTS or EISDIR for existing path components, and not that it could see 
9075          * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz" 
9076          * it will fail in a spurious  manner.  Need to figure out if this is valid behavior. 
9078         if ((error 
= vn_authorize_mkdir(dvp
, &nd
.ni_cnd
, vap
, ctx
, NULL
)) != 0) { 
9079                 if (error 
== EACCES 
|| error 
== EPERM
) { 
9087                          * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST 
9088                          * rather than EACCESS if the target exists. 
9090                         NDINIT(&nd
, LOOKUP
, OP_MKDIR
, AUDITVNPATH1
, segflg
, 
9092                         error2 
= nameiat(&nd
, fd
); 
9106          * make the directory 
9108         if ((error 
= vn_create(dvp
, &vp
, &nd
, vap
, 0, 0, NULL
, ctx
)) != 0) { 
9109                 if (error 
== EKEEPLOOKING
) { 
9111                         goto continue_lookup
; 
9117         // Make sure the name & parent pointers are hooked up 
9118         if (vp
->v_name 
== NULL
) { 
9119                 update_flags 
|= VNODE_UPDATE_NAME
; 
9121         if (vp
->v_parent 
== NULLVP
) { 
9122                 update_flags 
|= VNODE_UPDATE_PARENT
; 
9126                 vnode_update_identity(vp
, dvp
, nd
.ni_cnd
.cn_nameptr
, nd
.ni_cnd
.cn_namelen
, nd
.ni_cnd
.cn_hash
, update_flags
); 
9130         add_fsevent(FSE_CREATE_DIR
, ctx
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
); 
9135          * nameidone has to happen before we vnode_put(dvp) 
9136          * since it may need to release the fs_nodelock on the dvp 
9151  * mkdir_extended: Create a directory; with extended security (ACL). 
9153  * Parameters:    p                       Process requesting to create the directory 
9154  *                uap                     User argument descriptor (see below) 
9157  * Indirect:      uap->path               Path of directory to create 
9158  *                uap->mode               Access permissions to set 
9159  *                uap->xsecurity          ACL to set 
9161  * Returns:        0                      Success 
9166 mkdir_extended(proc_t p
, struct mkdir_extended_args 
*uap
, __unused 
int32_t *retval
) 
9169         kauth_filesec_t xsecdst
; 
9170         struct vnode_attr va
; 
9172         AUDIT_ARG(owner
, uap
->uid
, uap
->gid
); 
9175         if ((uap
->xsecurity 
!= USER_ADDR_NULL
) && 
9176             ((ciferror 
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0)) { 
9181         VATTR_SET(&va
, va_mode
, (uap
->mode 
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
); 
9182         if (xsecdst 
!= NULL
) { 
9183                 VATTR_SET(&va
, va_acl
, &xsecdst
->fsec_acl
); 
9186         ciferror 
= mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
, 
9188         if (xsecdst 
!= NULL
) { 
9189                 kauth_filesec_free(xsecdst
); 
9195 mkdir(proc_t p
, struct mkdir_args 
*uap
, __unused 
int32_t *retval
) 
9197         struct vnode_attr va
; 
9200         VATTR_SET(&va
, va_mode
, (uap
->mode 
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
); 
9202         return mkdir1at(vfs_context_current(), uap
->path
, &va
, AT_FDCWD
, 
9207 mkdirat(proc_t p
, struct mkdirat_args 
*uap
, __unused 
int32_t *retval
) 
9209         struct vnode_attr va
; 
9212         VATTR_SET(&va
, va_mode
, (uap
->mode 
& ACCESSPERMS
) & ~p
->p_fd
->fd_cmask
); 
9214         return mkdir1at(vfs_context_current(), uap
->path
, &va
, uap
->fd
, 
9219 rmdirat_internal(vfs_context_t ctx
, int fd
, user_addr_t dirpath
, 
9220     enum uio_seg segflg
, int unlink_flags
) 
9224         struct nameidata nd
; 
9226         char     *no_firmlink_path 
= NULL
; 
9228         int       len_no_firmlink_path 
= 0; 
9229         int has_listeners 
= 0; 
9231         int truncated_path 
= 0; 
9232         int truncated_no_firmlink_path 
= 0; 
9234         struct vnode_attr va
; 
9235 #endif /* CONFIG_FSE */ 
9236         struct vnode_attr 
*vap 
= NULL
; 
9237         int restart_count 
= 0; 
9243          * This loop exists to restart rmdir in the unlikely case that two 
9244          * processes are simultaneously trying to remove the same directory 
9245          * containing orphaned appleDouble files. 
9248                 NDINIT(&nd
, DELETE
, OP_RMDIR
, LOCKPARENT 
| AUDITVNPATH1
, 
9249                     segflg
, dirpath
, ctx
); 
9250                 nd
.ni_flag 
= NAMEI_COMPOUNDRMDIR
; 
9255                 error 
= nameiat(&nd
, fd
); 
9264                         batched 
= vnode_compound_rmdir_available(vp
); 
9266                         if (vp
->v_flag 
& VROOT
) { 
9268                                  * The root of a mounted filesystem cannot be deleted. 
9274 #if DEVELOPMENT || DEBUG 
9276                          * XXX VSWAP: Check for entitlements or special flag here 
9277                          * so we can restrict access appropriately. 
9279 #else /* DEVELOPMENT || DEBUG */ 
9281                         if (vnode_isswap(vp
) && (ctx 
!= vfs_context_kernel())) { 
9285 #endif /* DEVELOPMENT || DEBUG */ 
9288                          * Removed a check here; we used to abort if vp's vid 
9289                          * was not the same as what we'd seen the last time around. 
9290                          * I do not think that check was valid, because if we retry 
9291                          * and all dirents are gone, the directory could legitimately 
9292                          * be recycled but still be present in a situation where we would 
9293                          * have had permission to delete.  Therefore, we won't make 
9294                          * an effort to preserve that check now that we may not have a 
9299                                 error 
= vn_authorize_rmdir(dvp
, vp
, &nd
.ni_cnd
, ctx
, NULL
); 
9301                                         if (error 
== ENOENT
) { 
9302                                                 if (restart_count 
< MAX_AUTHORIZE_ENOENT_RETRIES
) { 
9313                         if (!vnode_compound_rmdir_available(dvp
)) { 
9314                                 panic("No error, but no compound rmdir?"); 
9319                 fse_info  finfo 
= {0}; 
9321                 need_event 
= need_fsevent(FSE_DELETE
, dvp
); 
9324                                 get_fse_info(vp
, &finfo
, ctx
); 
9326                                 error 
= vfs_get_notify_attributes(&va
); 
9335                 has_listeners 
= kauth_authorize_fileop_has_listeners(); 
9336                 if (need_event 
|| has_listeners
) { 
9341                         len_path 
= safe_getpath(dvp
, nd
.ni_cnd
.cn_nameptr
, path
, MAXPATHLEN
, &truncated_path
); 
9343                         if (no_firmlink_path 
== NULL
) { 
9344                                 GET_PATH(no_firmlink_path
); 
9347                         len_no_firmlink_path 
= safe_getpath_no_firmlink(dvp
, nd
.ni_cnd
.cn_nameptr
, no_firmlink_path
, MAXPATHLEN
, &truncated_no_firmlink_path
); 
9349                         if (truncated_no_firmlink_path
) { 
9350                                 finfo
.mode 
|= FSE_TRUNCATED_PATH
; 
9355                 error 
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
); 
9358                         /* Couldn't find a vnode */ 
9362                 if (error 
== EKEEPLOOKING
) { 
9363                         goto continue_lookup
; 
9364                 } else if (batched 
&& error 
== ENOENT
) { 
9365                         if (restart_count 
< MAX_AUTHORIZE_ENOENT_RETRIES
) { 
9367                                  * For compound VNOPs, the authorization callback 
9368                                  * may return ENOENT in case of racing hard link lookups 
9369                                  * redrive the lookup. 
9378                  * XXX There's no provision for passing flags 
9379                  * to VNOP_RMDIR().  So, if vn_rmdir() fails 
9380                  * because it's not empty, then we try again 
9381                  * with VNOP_REMOVE(), passing in a special 
9382                  * flag that clever file systems will know 
9385                 if (error 
== ENOTEMPTY 
&& 
9386                     (unlink_flags 
& VNODE_REMOVE_DATALESS_DIR
) != 0) { 
9388                          * If this fails, we want to keep the original 
9391                         if (vn_remove(dvp
, &vp
, &nd
, 
9392                             VNODE_REMOVE_DATALESS_DIR
, vap
, ctx
) == 0) { 
9397 #if CONFIG_APPLEDOUBLE 
9399                  * Special case to remove orphaned AppleDouble 
9400                  * files. I don't like putting this in the kernel, 
9401                  * but carbon does not like putting this in carbon either, 
9404                 if (error 
== ENOTEMPTY
) { 
9405                         int ad_error 
= rmdir_remove_orphaned_appleDouble(vp
, ctx
, &restart_flag
); 
9406                         if (ad_error 
== EBUSY
) { 
9413                          * Assuming everything went well, we will try the RMDIR again 
9416                                 error 
= vn_rmdir(dvp
, &vp
, &nd
, vap
, ctx
); 
9419 #endif /* CONFIG_APPLEDOUBLE */ 
9421                  * Call out to allow 3rd party notification of delete. 
9422                  * Ignore result of kauth_authorize_fileop call. 
9425                         if (has_listeners
) { 
9426                                 kauth_authorize_fileop(vfs_context_ucred(ctx
), 
9427                                     KAUTH_FILEOP_DELETE
, 
9432                         if (vp
->v_flag 
& VISHARDLINK
) { 
9433                                 // see the comment in unlink1() about why we update 
9434                                 // the parent of a hard link when it is removed 
9435                                 vnode_update_identity(vp
, NULL
, NULL
, 0, 0, VNODE_UPDATE_PARENT
); 
9441                                         vnode_get_fse_info_from_vap(vp
, &finfo
, vap
); 
9443                                 add_fsevent(FSE_DELETE
, ctx
, 
9444                                     FSE_ARG_STRING
, len_no_firmlink_path
, no_firmlink_path
, 
9445                                     FSE_ARG_FINFO
, &finfo
, 
9457                 if (no_firmlink_path 
!= NULL
) { 
9458                         RELEASE_PATH(no_firmlink_path
); 
9459                         no_firmlink_path 
= NULL
; 
9463                  * nameidone has to happen before we vnode_put(dvp) 
9464                  * since it may need to release the fs_nodelock on the dvp 
9473                 if (restart_flag 
== 0) { 
9474                         wakeup_one((caddr_t
)vp
); 
9477                 tsleep(vp
, PVFS
, "rm AD", 1); 
9478         } while (restart_flag 
!= 0); 
9484  * Remove a directory file. 
9488 rmdir(__unused proc_t p
, struct rmdir_args 
*uap
, __unused 
int32_t *retval
) 
9490         return rmdirat_internal(vfs_context_current(), AT_FDCWD
, 
9491                    CAST_USER_ADDR_T(uap
->path
), UIO_USERSPACE
, 0); 
9494 /* Get direntry length padded to 8 byte alignment */ 
9495 #define DIRENT64_LEN(namlen) \ 
9496         ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7) 
9498 /* Get dirent length padded to 4 byte alignment */ 
9499 #define DIRENT_LEN(namelen) \ 
9500         ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3) 
9502 /* Get the end of this dirent */ 
9503 #define DIRENT_END(dep) \ 
9504         (((char *)(dep)) + (dep)->d_reclen - 1) 
9507 vnode_readdir64(struct vnode 
*vp
, struct uio 
*uio
, int flags
, int *eofflag
, 
9508     int *numdirent
, vfs_context_t ctxp
) 
9510         /* Check if fs natively supports VNODE_READDIR_EXTENDED */ 
9511         if ((vp
->v_mount
->mnt_vtable
->vfc_vfsflags 
& VFC_VFSREADDIR_EXTENDED
) && 
9512             ((vp
->v_mount
->mnt_kern_flag 
& MNTK_DENY_READDIREXT
) == 0)) { 
9513                 return VNOP_READDIR(vp
, uio
, flags
, eofflag
, numdirent
, ctxp
); 
9518                 struct direntry 
*entry64
; 
9524                  * We're here because the underlying file system does not 
9525                  * support direnties or we mounted denying support so we must 
9526                  * fall back to dirents and convert them to direntries. 
9528                  * Our kernel buffer needs to be smaller since re-packing will 
9529                  * expand each dirent.  The worse case (when the name length 
9530                  * is 3 or less) corresponds to a struct direntry size of 32 
9531                  * bytes (8-byte aligned) and a struct dirent size of 12 bytes 
9532                  * (4-byte aligned).  So having a buffer that is 3/8 the size 
9533                  * will prevent us from reading more than we can pack. 
9535                  * Since this buffer is wired memory, we will limit the 
9536                  * buffer size to a maximum of 32K. We would really like to 
9537                  * use 32K in the MIN(), but we use magic number 87371 to 
9538                  * prevent uio_resid() * 3 / 8 from overflowing. 
9540                 bufsize 
= 3 * MIN((user_size_t
)uio_resid(uio
), 87371u) / 8; 
9541                 bufptr 
= kheap_alloc(KHEAP_DATA_BUFFERS
, bufsize
, Z_WAITOK
); 
9542                 if (bufptr 
== NULL
) { 
9546                 auio 
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
); 
9547                 uio_addiov(auio
, (uintptr_t)bufptr
, bufsize
); 
9548                 auio
->uio_offset 
= uio
->uio_offset
; 
9550                 error 
= VNOP_READDIR(vp
, auio
, 0, eofflag
, numdirent
, ctxp
); 
9552                 dep 
= (struct dirent 
*)bufptr
; 
9553                 bytesread 
= bufsize 
- uio_resid(auio
); 
9555                 entry64 
= kheap_alloc(KHEAP_TEMP
, sizeof(struct direntry
), Z_WAITOK
); 
9557                  * Convert all the entries and copy them out to user's buffer. 
9559                 while (error 
== 0 && (char *)dep 
< ((char *)bufptr 
+ bytesread
)) { 
9560                         /* First check that the dirent struct up to d_name is within the buffer */ 
9561                         if ((char*)dep 
+ offsetof(struct dirent
, d_name
) > ((char *)bufptr 
+ bytesread
) || 
9562                             /* Check that the length of the entire dirent is within the buffer */ 
9563                             DIRENT_END(dep
) > ((char *)bufptr 
+ bytesread
) || 
9564                             /* Check that the actual length including the name doesn't exceed d_reclen */ 
9565                             DIRENT_LEN(dep
->d_namlen
) > dep
->d_reclen
) { 
9566                                 printf("%s: %s: Bad dirent recived from directory %s\n", __func__
, 
9567                                     vp
->v_mount
->mnt_vfsstat
.f_mntonname
, 
9568                                     vp
->v_name 
? vp
->v_name 
: "<unknown>"); 
9573                         size_t  enbufsize 
= DIRENT64_LEN(dep
->d_namlen
); 
9575                         bzero(entry64
, enbufsize
); 
9576                         /* Convert a dirent to a dirent64. */ 
9577                         entry64
->d_ino 
= dep
->d_ino
; 
9578                         entry64
->d_seekoff 
= 0; 
9579                         entry64
->d_reclen 
= (uint16_t)enbufsize
; 
9580                         entry64
->d_namlen 
= dep
->d_namlen
; 
9581                         entry64
->d_type 
= dep
->d_type
; 
9582                         bcopy(dep
->d_name
, entry64
->d_name
, dep
->d_namlen 
+ 1); 
9584                         /* Move to next entry. */ 
9585                         dep 
= (struct dirent 
*)((char *)dep 
+ dep
->d_reclen
); 
9587                         /* Copy entry64 to user's buffer. */ 
9588                         error 
= uiomove((caddr_t
)entry64
, entry64
->d_reclen
, uio
); 
9591                 /* Update the real offset using the offset we got from VNOP_READDIR. */ 
9593                         uio
->uio_offset 
= auio
->uio_offset
; 
9596                 kheap_free(KHEAP_DATA_BUFFERS
, bufptr
, bufsize
); 
9597                 kheap_free(KHEAP_TEMP
, entry64
, sizeof(struct direntry
)); 
9602 #define GETDIRENTRIES_MAXBUFSIZE        (128 * 1024 * 1024U) 
9605  * Read a block of directory entries in a file system independent format. 
9608 getdirentries_common(int fd
, user_addr_t bufp
, user_size_t bufsize
, ssize_t 
*bytesread
, 
9609     off_t 
*offset
, int *eofflag
, int flags
) 
9612         struct vfs_context context 
= *vfs_context_current();    /* local copy */ 
9613         struct fileproc 
*fp
; 
9615         int spacetype 
= proc_is64bit(vfs_context_proc(&context
)) ? UIO_USERSPACE64 
: UIO_USERSPACE32
; 
9617         int error
, numdirent
; 
9618         char uio_buf
[UIO_SIZEOF(1)]; 
9620         error 
= fp_getfvp(vfs_context_proc(&context
), fd
, &fp
, &vp
); 
9624         if ((fp
->fp_glob
->fg_flag 
& FREAD
) == 0) { 
9625                 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
); 
9630         if (bufsize 
> GETDIRENTRIES_MAXBUFSIZE
) { 
9631                 bufsize 
= GETDIRENTRIES_MAXBUFSIZE
; 
9635         error 
= mac_file_check_change_offset(vfs_context_ucred(&context
), fp
->fp_glob
); 
9640         if ((error 
= vnode_getwithref(vp
))) { 
9643         AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
); 
9646         if (vp
->v_type 
!= VDIR
) { 
9647                 (void)vnode_put(vp
); 
9653         error 
= mac_vnode_check_readdir(&context
, vp
); 
9655                 (void)vnode_put(vp
); 
9660         loff 
= fp
->fp_glob
->fg_offset
; 
9661         auio 
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
)); 
9662         uio_addiov(auio
, bufp
, bufsize
); 
9664         if (flags 
& VNODE_READDIR_EXTENDED
) { 
9665                 error 
= vnode_readdir64(vp
, auio
, flags
, eofflag
, &numdirent
, &context
); 
9666                 fp
->fp_glob
->fg_offset 
= uio_offset(auio
); 
9668                 error 
= VNOP_READDIR(vp
, auio
, 0, eofflag
, &numdirent
, &context
); 
9669                 fp
->fp_glob
->fg_offset 
= uio_offset(auio
); 
9672                 (void)vnode_put(vp
); 
9676         if ((user_ssize_t
)bufsize 
== uio_resid(auio
)) { 
9677                 if ((vp
->v_mount
->mnt_flag 
& MNT_UNION
)) { 
9678                         struct vnode 
*tvp 
= vp
; 
9679                         if (lookup_traverse_union(tvp
, &vp
, &context
) == 0) { 
9681                                 fp
->fp_glob
->fg_data 
= (caddr_t
) vp
; 
9682                                 fp
->fp_glob
->fg_offset 
= 0; 
9696         *bytesread 
= bufsize 
- uio_resid(auio
); 
9704 getdirentries(__unused 
struct proc 
*p
, struct getdirentries_args 
*uap
, int32_t *retval
) 
9710         AUDIT_ARG(fd
, uap
->fd
); 
9711         error 
= getdirentries_common(uap
->fd
, uap
->buf
, uap
->count
, 
9712             &bytesread
, &offset
, &eofflag
, 0); 
9715                 if (proc_is64bit(p
)) { 
9716                         user64_long_t base 
= (user64_long_t
)offset
; 
9717                         error 
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user64_long_t
)); 
9719                         user32_long_t base 
= (user32_long_t
)offset
; 
9720                         error 
= copyout((caddr_t
)&base
, uap
->basep
, sizeof(user32_long_t
)); 
9722                 *retval 
= (int)bytesread
; 
9728 getdirentries64(__unused 
struct proc 
*p
, struct getdirentries64_args 
*uap
, user_ssize_t 
*retval
) 
9733         user_size_t bufsize
; 
9735         AUDIT_ARG(fd
, uap
->fd
); 
9738          * If the buffer is at least GETDIRENTRIES64_EXTENDED_BUFSIZE large, 
9739          * then the kernel carves out the last 4 bytes to return extended 
9740          * information to userspace (namely whether we reached EOF with this call). 
9742         if (uap
->bufsize 
>= GETDIRENTRIES64_EXTENDED_BUFSIZE
) { 
9743                 bufsize 
= uap
->bufsize 
- sizeof(getdirentries64_flags_t
); 
9745                 bufsize 
= uap
->bufsize
; 
9748         error 
= getdirentries_common(uap
->fd
, uap
->buf
, bufsize
, 
9749             &bytesread
, &offset
, &eofflag
, VNODE_READDIR_EXTENDED
); 
9752                 *retval 
= bytesread
; 
9753                 error 
= copyout((caddr_t
)&offset
, uap
->position
, sizeof(off_t
)); 
9755                 if (error 
== 0 && uap
->bufsize 
>= GETDIRENTRIES64_EXTENDED_BUFSIZE
) { 
9756                         getdirentries64_flags_t flags 
= 0; 
9758                                 flags 
|= GETDIRENTRIES64_EOF
; 
9760                         error 
= copyout(&flags
, (user_addr_t
)uap
->buf 
+ bufsize
, 
9769  * Set the mode mask for creation of filesystem nodes. 
9770  * XXX implement xsecurity 
9772 #define UMASK_NOXSECURITY        (void *)1      /* leave existing xsecurity alone */ 
9774 umask1(proc_t p
, int newmask
, __unused kauth_filesec_t fsec
, int32_t *retval
) 
9776         struct filedesc 
*fdp
; 
9778         AUDIT_ARG(mask
, newmask
); 
9781         *retval 
= fdp
->fd_cmask
; 
9782         fdp
->fd_cmask 
= newmask 
& ALLPERMS
; 
9788  * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL). 
9790  * Parameters:    p                       Process requesting to set the umask 
9791  *                uap                     User argument descriptor (see below) 
9792  *                retval                  umask of the process (parameter p) 
9794  * Indirect:      uap->newmask            umask to set 
9795  *                uap->xsecurity          ACL to set 
9797  * Returns:        0                      Success 
9802 umask_extended(proc_t p
, struct umask_extended_args 
*uap
, int32_t *retval
) 
9805         kauth_filesec_t xsecdst
; 
9807         xsecdst 
= KAUTH_FILESEC_NONE
; 
9808         if (uap
->xsecurity 
!= USER_ADDR_NULL
) { 
9809                 if ((ciferror 
= kauth_copyinfilesec(uap
->xsecurity
, &xsecdst
)) != 0) { 
9813                 xsecdst 
= KAUTH_FILESEC_NONE
; 
9816         ciferror 
= umask1(p
, uap
->newmask
, xsecdst
, retval
); 
9818         if (xsecdst 
!= KAUTH_FILESEC_NONE
) { 
9819                 kauth_filesec_free(xsecdst
); 
9825 umask(proc_t p
, struct umask_args 
*uap
, int32_t *retval
) 
9827         return umask1(p
, uap
->newmask
, UMASK_NOXSECURITY
, retval
); 
9831  * Void all references to file by ripping underlying filesystem 
9836 revoke(proc_t p
, struct revoke_args 
*uap
, __unused 
int32_t *retval
) 
9839         struct vnode_attr va
; 
9840         vfs_context_t ctx 
= vfs_context_current(); 
9842         struct nameidata nd
; 
9844         NDINIT(&nd
, LOOKUP
, OP_REVOKE
, FOLLOW 
| AUDITVNPATH1
, UIO_USERSPACE
, 
9854         if (!(vnode_ischr(vp
) || vnode_isblk(vp
))) { 
9859         if (vnode_isblk(vp
) && vnode_ismountedon(vp
)) { 
9865         error 
= mac_vnode_check_revoke(ctx
, vp
); 
9872         VATTR_WANTED(&va
, va_uid
); 
9873         if ((error 
= vnode_getattr(vp
, &va
, ctx
))) { 
9876         if (kauth_cred_getuid(vfs_context_ucred(ctx
)) != va
.va_uid 
&& 
9877             (error 
= suser(vfs_context_ucred(ctx
), &p
->p_acflag
))) { 
9880         if (vp
->v_usecount 
> 0 || (vnode_isaliased(vp
))) { 
9881                 VNOP_REVOKE(vp
, REVOKEALL
, ctx
); 
9890  *  HFS/HFS PlUS SPECIFIC SYSTEM CALLS 
9891  *  The following system calls are designed to support features 
9892  *  which are specific to the HFS & HFS Plus volume formats 
9897  * Obtain attribute information on objects in a directory while enumerating 
9902 getdirentriesattr(proc_t p
, struct getdirentriesattr_args 
*uap
, int32_t *retval
) 
9905         struct fileproc 
*fp
; 
9907         int spacetype 
= proc_is64bit(p
) ? UIO_USERSPACE64 
: UIO_USERSPACE32
; 
9908         uint32_t count 
= 0, savecount 
= 0; 
9909         uint32_t newstate 
= 0; 
9912         struct attrlist attributelist
; 
9913         vfs_context_t ctx 
= vfs_context_current(); 
9915         char uio_buf
[UIO_SIZEOF(1)]; 
9916         kauth_action_t action
; 
9920         /* Get the attributes into kernel space */ 
9921         if ((error 
= copyin(uap
->alist
, (caddr_t
)&attributelist
, sizeof(attributelist
)))) { 
9924         if ((error 
= copyin(uap
->count
, (caddr_t
)&count
, sizeof(count
)))) { 
9928         if ((error 
= fp_getfvp(p
, fd
, &fp
, &vp
))) { 
9931         if ((fp
->fp_glob
->fg_flag 
& FREAD
) == 0) { 
9932                 AUDIT_ARG(vnpath_withref
, vp
, ARG_VNODE1
); 
9939         error 
= mac_file_check_change_offset(vfs_context_ucred(ctx
), 
9947         if ((error 
= vnode_getwithref(vp
))) { 
9951         AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
); 
9954         if (vp
->v_type 
!= VDIR
) { 
9955                 (void)vnode_put(vp
); 
9961         error 
= mac_vnode_check_readdir(ctx
, vp
); 
9963                 (void)vnode_put(vp
); 
9968         /* set up the uio structure which will contain the users return buffer */ 
9969         loff 
= fp
->fp_glob
->fg_offset
; 
9970         auio 
= uio_createwithbuffer(1, loff
, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
)); 
9971         uio_addiov(auio
, uap
->buffer
, uap
->buffersize
); 
9974          * If the only item requested is file names, we can let that past with 
9975          * just LIST_DIRECTORY.  If they want any other attributes, that means 
9976          * they need SEARCH as well. 
9978         action 
= KAUTH_VNODE_LIST_DIRECTORY
; 
9979         if ((attributelist
.commonattr 
& ~ATTR_CMN_NAME
) || 
9980             attributelist
.fileattr 
|| attributelist
.dirattr
) { 
9981                 action 
|= KAUTH_VNODE_SEARCH
; 
9984         if ((error 
= vnode_authorize(vp
, NULL
, action
, ctx
)) == 0) { 
9985                 /* Believe it or not, uap->options only has 32-bits of valid 
9986                  * info, so truncate before extending again */ 
9988                 error 
= VNOP_READDIRATTR(vp
, &attributelist
, auio
, count
, 
9989                     (uint32_t)uap
->options
, &newstate
, &eofflag
, &count
, ctx
); 
9993                 (void) vnode_put(vp
); 
9998          * If we've got the last entry of a directory in a union mount 
9999          * then reset the eofflag and pretend there's still more to come. 
10000          * The next call will again set eofflag and the buffer will be empty, 
10001          * so traverse to the underlying directory and do the directory 
10004         if (eofflag 
&& vp
->v_mount
->mnt_flag 
& MNT_UNION
) { 
10005                 if (uio_resid(auio
) < (user_ssize_t
) uap
->buffersize
) { // Got some entries 
10007                 } else {                                                // Empty buffer 
10008                         struct vnode 
*tvp 
= vp
; 
10009                         if (lookup_traverse_union(tvp
, &vp
, ctx
) == 0) { 
10010                                 vnode_ref_ext(vp
, fp
->fp_glob
->fg_flag 
& O_EVTONLY
, 0); 
10011                                 fp
->fp_glob
->fg_data 
= (caddr_t
) vp
; 
10012                                 fp
->fp_glob
->fg_offset 
= 0; // reset index for new dir 
10014                                 vnode_rele_internal(tvp
, fp
->fp_glob
->fg_flag 
& O_EVTONLY
, 0, 0); 
10022         (void)vnode_put(vp
); 
10027         fp
->fp_glob
->fg_offset 
= uio_offset(auio
); /* should be multiple of dirent, not variable */ 
10029         if ((error 
= copyout((caddr_t
) &count
, uap
->count
, sizeof(count
)))) { 
10032         if ((error 
= copyout((caddr_t
) &newstate
, uap
->newstate
, sizeof(newstate
)))) { 
10035         if ((error 
= copyout((caddr_t
) &loff
, uap
->basep
, sizeof(loff
)))) { 
10039         *retval 
= eofflag
;  /* similar to getdirentries */ 
10043         return error
; /* return error earlier, an retval of 0 or 1 now */ 
10044 } /* end of getdirentriesattr system call */ 
10047  * Exchange data between two files 
10052 exchangedata(__unused proc_t p
, struct exchangedata_args 
*uap
, __unused 
int32_t *retval
) 
10054         struct nameidata fnd
, snd
; 
10055         vfs_context_t ctx 
= vfs_context_current(); 
10059         u_int32_t nameiflags
; 
10060         char *fpath 
= NULL
; 
10061         char *spath 
= NULL
; 
10062         int   flen 
= 0, slen 
= 0; 
10063         int from_truncated 
= 0, to_truncated 
= 0; 
10065         fse_info f_finfo
, s_finfo
; 
10069         if ((uap
->options 
& FSOPT_NOFOLLOW
) == 0) { 
10070                 nameiflags 
|= FOLLOW
; 
10073         NDINIT(&fnd
, LOOKUP
, OP_EXCHANGEDATA
, nameiflags 
| AUDITVNPATH1
, 
10074             UIO_USERSPACE
, uap
->path1
, ctx
); 
10076         error 
= namei(&fnd
); 
10084         NDINIT(&snd
, LOOKUP
, OP_EXCHANGEDATA
, CN_NBMOUNTLOOK 
| nameiflags 
| AUDITVNPATH2
, 
10085             UIO_USERSPACE
, uap
->path2
, ctx
); 
10087         error 
= namei(&snd
); 
10096          * if the files are the same, return an inval error 
10104          * if the files are on different volumes, return an error 
10106         if (svp
->v_mount 
!= fvp
->v_mount
) { 
10111         /* If they're not files, return an error */ 
10112         if ((vnode_isreg(fvp
) == 0) || (vnode_isreg(svp
) == 0)) { 
10118         error 
= mac_vnode_check_exchangedata(ctx
, 
10124         if (((error 
= vnode_authorize(fvp
, NULL
, KAUTH_VNODE_READ_DATA 
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0) || 
10125             ((error 
= vnode_authorize(svp
, NULL
, KAUTH_VNODE_READ_DATA 
| KAUTH_VNODE_WRITE_DATA
, ctx
)) != 0)) { 
10131                 need_fsevent(FSE_EXCHANGE
, fvp
) || 
10133                 kauth_authorize_fileop_has_listeners()) { 
10137                 flen 
= safe_getpath(fvp
, NULL
, fpath
, MAXPATHLEN
, &from_truncated
); 
10138                 slen 
= safe_getpath(svp
, NULL
, spath
, MAXPATHLEN
, &to_truncated
); 
10141                 get_fse_info(fvp
, &f_finfo
, ctx
); 
10142                 get_fse_info(svp
, &s_finfo
, ctx
); 
10143                 if (from_truncated 
|| to_truncated
) { 
10144                         // set it here since only the f_finfo gets reported up to user space 
10145                         f_finfo
.mode 
|= FSE_TRUNCATED_PATH
; 
10149         /* Ok, make the call */ 
10150         error 
= VNOP_EXCHANGE(fvp
, svp
, 0, ctx
); 
10153                 const char *tmpname
; 
10155                 if (fpath 
!= NULL 
&& spath 
!= NULL
) { 
10156                         /* call out to allow 3rd party notification of exchangedata. 
10157                          * Ignore result of kauth_authorize_fileop call. 
10159                         kauth_authorize_fileop(vfs_context_ucred(ctx
), KAUTH_FILEOP_EXCHANGE
, 
10160                             (uintptr_t)fpath
, (uintptr_t)spath
); 
10164                 tmpname     
= fvp
->v_name
; 
10165                 fvp
->v_name 
= svp
->v_name
; 
10166                 svp
->v_name 
= tmpname
; 
10168                 if (fvp
->v_parent 
!= svp
->v_parent
) { 
10171                         tmp           
= fvp
->v_parent
; 
10172                         fvp
->v_parent 
= svp
->v_parent
; 
10173                         svp
->v_parent 
= tmp
; 
10175                 name_cache_unlock(); 
10178                 if (fpath 
!= NULL 
&& spath 
!= NULL
) { 
10179                         add_fsevent(FSE_EXCHANGE
, ctx
, 
10180                             FSE_ARG_STRING
, flen
, fpath
, 
10181                             FSE_ARG_FINFO
, &f_finfo
, 
10182                             FSE_ARG_STRING
, slen
, spath
, 
10183                             FSE_ARG_FINFO
, &s_finfo
, 
10190         if (fpath 
!= NULL
) { 
10191                 RELEASE_PATH(fpath
); 
10193         if (spath 
!= NULL
) { 
10194                 RELEASE_PATH(spath
); 
10203  * Return (in MB) the amount of freespace on the given vnode's volume. 
10205 uint32_t freespace_mb(vnode_t vp
); 
10208 freespace_mb(vnode_t vp
) 
10210         vfs_update_vfsstat(vp
->v_mount
, vfs_context_current(), VFS_USER_EVENT
); 
10211         return (uint32_t)(((uint64_t)vp
->v_mount
->mnt_vfsstat
.f_bavail 
* 
10212                vp
->v_mount
->mnt_vfsstat
.f_bsize
) >> 20); 
10215 #if CONFIG_SEARCHFS 
10220 searchfs(proc_t p
, struct searchfs_args 
*uap
, __unused 
int32_t *retval
) 
10225         struct nameidata nd
; 
10226         struct user64_fssearchblock searchblock
; 
10227         struct searchstate 
*state
; 
10228         struct attrlist 
*returnattrs
; 
10229         struct timeval timelimit
; 
10230         void *searchparams1
, *searchparams2
; 
10232         int spacetype 
= proc_is64bit(p
) ? UIO_USERSPACE64 
: UIO_USERSPACE32
; 
10233         uint32_t nummatches
; 
10235         uint32_t nameiflags
; 
10236         vfs_context_t ctx 
= vfs_context_current(); 
10237         char uio_buf
[UIO_SIZEOF(1)]; 
10239         /* Start by copying in fsearchblock parameter list */ 
10240         if (IS_64BIT_PROCESS(p
)) { 
10241                 error 
= copyin(uap
->searchblock
, (caddr_t
) &searchblock
, sizeof(searchblock
)); 
10242                 timelimit
.tv_sec 
= searchblock
.timelimit
.tv_sec
; 
10243                 timelimit
.tv_usec 
= searchblock
.timelimit
.tv_usec
; 
10245                 struct user32_fssearchblock tmp_searchblock
; 
10247                 error 
= copyin(uap
->searchblock
, (caddr_t
) &tmp_searchblock
, sizeof(tmp_searchblock
)); 
10248                 // munge into 64-bit version 
10249                 searchblock
.returnattrs 
= CAST_USER_ADDR_T(tmp_searchblock
.returnattrs
); 
10250                 searchblock
.returnbuffer 
= CAST_USER_ADDR_T(tmp_searchblock
.returnbuffer
); 
10251                 searchblock
.returnbuffersize 
= tmp_searchblock
.returnbuffersize
; 
10252                 searchblock
.maxmatches 
= tmp_searchblock
.maxmatches
; 
10254                  * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary 
10255                  * from a 32 bit long, and tv_usec is already a signed 32 bit int. 
10257                 timelimit
.tv_sec 
= (__darwin_time_t
) tmp_searchblock
.timelimit
.tv_sec
; 
10258                 timelimit
.tv_usec 
= (__darwin_useconds_t
) tmp_searchblock
.timelimit
.tv_usec
; 
10259                 searchblock
.searchparams1 
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams1
); 
10260                 searchblock
.sizeofsearchparams1 
= tmp_searchblock
.sizeofsearchparams1
; 
10261                 searchblock
.searchparams2 
= CAST_USER_ADDR_T(tmp_searchblock
.searchparams2
); 
10262                 searchblock
.sizeofsearchparams2 
= tmp_searchblock
.sizeofsearchparams2
; 
10263                 searchblock
.searchattrs 
= tmp_searchblock
.searchattrs
; 
10269         /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2. 
10271         if (searchblock
.sizeofsearchparams1 
> SEARCHFS_MAX_SEARCHPARMS 
|| 
10272             searchblock
.sizeofsearchparams2 
> SEARCHFS_MAX_SEARCHPARMS
) { 
10276         /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */ 
10277         /* It all has to do into local memory and it's not that big so we might as well  put it all together. */ 
10278         /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/ 
10281         /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate        */ 
10282         /*       due to the changes in rdar://problem/12438273.  That way if a 3rd party file system          */ 
10283         /*       assumes the size is still 556 bytes it will continue to work                                 */ 
10285         mallocsize 
= searchblock
.sizeofsearchparams1 
+ searchblock
.sizeofsearchparams2 
+ 
10286             sizeof(struct attrlist
) + sizeof(struct searchstate
) + (2 * sizeof(uint32_t)); 
10288         searchparams1 
= kheap_alloc(KHEAP_DATA_BUFFERS
, mallocsize
, Z_WAITOK
); 
10290         /* Now set up the various pointers to the correct place in our newly allocated memory */ 
10292         searchparams2 
= (void *) (((caddr_t
) searchparams1
) + searchblock
.sizeofsearchparams1
); 
10293         returnattrs 
= (struct attrlist 
*) (((caddr_t
) searchparams2
) + searchblock
.sizeofsearchparams2
); 
10294         state 
= (struct searchstate 
*) (((caddr_t
) returnattrs
) + sizeof(struct attrlist
)); 
10296         /* Now copy in the stuff given our local variables. */ 
10298         if ((error 
= copyin(searchblock
.searchparams1
, searchparams1
, searchblock
.sizeofsearchparams1
))) { 
10302         if ((error 
= copyin(searchblock
.searchparams2
, searchparams2
, searchblock
.sizeofsearchparams2
))) { 
10306         if ((error 
= copyin(searchblock
.returnattrs
, (caddr_t
) returnattrs
, sizeof(struct attrlist
)))) { 
10310         if ((error 
= copyin(uap
->state
, (caddr_t
) state
, sizeof(struct searchstate
)))) { 
10315          * When searching a union mount, need to set the 
10316          * start flag at the first call on each layer to 
10317          * reset state for the new volume. 
10319         if (uap
->options 
& SRCHFS_START
) { 
10320                 state
->ss_union_layer 
= 0; 
10322                 uap
->options 
|= state
->ss_union_flags
; 
10324         state
->ss_union_flags 
= 0; 
10327          * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter, 
10328          * which is passed in with an attrreference_t, we need to inspect the buffer manually here. 
10329          * The KPI does not provide us the ability to pass in the length of the buffers searchparams1 
10330          * and searchparams2. To obviate the need for all searchfs-supporting filesystems to 
10331          * validate the user-supplied data offset of the attrreference_t, we'll do it here. 
10334         if (searchblock
.searchattrs
.commonattr 
& ATTR_CMN_NAME
) { 
10335                 attrreference_t
* string_ref
; 
10336                 u_int32_t
* start_length
; 
10337                 user64_size_t param_length
; 
10339                 /* validate searchparams1 */ 
10340                 param_length 
= searchblock
.sizeofsearchparams1
; 
10341                 /* skip the word that specifies length of the buffer */ 
10342                 start_length 
= (u_int32_t
*) searchparams1
; 
10343                 start_length 
= start_length 
+ 1; 
10344                 string_ref 
= (attrreference_t
*) start_length
; 
10346                 /* ensure no negative offsets or too big offsets */ 
10347                 if (string_ref
->attr_dataoffset 
< 0) { 
10351                 if (string_ref
->attr_length 
> MAXPATHLEN
) { 
10356                 /* Check for pointer overflow in the string ref */ 
10357                 if (((char*) string_ref 
+ string_ref
->attr_dataoffset
) < (char*) string_ref
) { 
10362                 if (((char*) string_ref 
+ string_ref
->attr_dataoffset
) > ((char*)searchparams1 
+ param_length
)) { 
10366                 if (((char*)string_ref 
+ string_ref
->attr_dataoffset 
+ string_ref
->attr_length
) > ((char*)searchparams1 
+ param_length
)) { 
10372         /* set up the uio structure which will contain the users return buffer */ 
10373         auio 
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
, &uio_buf
[0], sizeof(uio_buf
)); 
10374         uio_addiov(auio
, searchblock
.returnbuffer
, searchblock
.returnbuffersize
); 
10377         if ((uap
->options 
& FSOPT_NOFOLLOW
) == 0) { 
10378                 nameiflags 
|= FOLLOW
; 
10380         NDINIT(&nd
, LOOKUP
, OP_SEARCHFS
, nameiflags 
| AUDITVNPATH1
, 
10381             UIO_USERSPACE
, uap
->path
, ctx
); 
10383         error 
= namei(&nd
); 
10391          * Switch to the root vnode for the volume 
10393         error 
= VFS_ROOT(vnode_mount(vp
), &tvp
, ctx
); 
10401          * If it's a union mount, the path lookup takes 
10402          * us to the top layer. But we may need to descend 
10403          * to a lower layer. For non-union mounts the layer 
10406         for (i 
= 0; i 
< (int) state
->ss_union_layer
; i
++) { 
10407                 if ((vp
->v_mount
->mnt_flag 
& MNT_UNION
) == 0) { 
10411                 vp 
= vp
->v_mount
->mnt_vnodecovered
; 
10417                 error 
= vnode_getwithref(vp
); 
10425         error 
= mac_vnode_check_searchfs(ctx
, vp
, &searchblock
.searchattrs
); 
10434          * If searchblock.maxmatches == 0, then skip the search. This has happened 
10435          * before and sometimes the underlying code doesnt deal with it well. 
10437         if (searchblock
.maxmatches 
== 0) { 
10443          * Allright, we have everything we need, so lets make that call. 
10445          * We keep special track of the return value from the file system: 
10446          * EAGAIN is an acceptable error condition that shouldn't keep us 
10447          * from copying out any results... 
10450         fserror 
= VNOP_SEARCHFS(vp
, 
10453             &searchblock
.searchattrs
, 
10454             (uint32_t)searchblock
.maxmatches
, 
10458             (uint32_t)uap
->scriptcode
, 
10459             (uint32_t)uap
->options
, 
10461             (struct searchstate 
*) &state
->ss_fsstate
, 
10465          * If it's a union mount we need to be called again 
10466          * to search the mounted-on filesystem. 
10468         if ((vp
->v_mount
->mnt_flag 
& MNT_UNION
) && fserror 
== 0) { 
10469                 state
->ss_union_flags 
= SRCHFS_START
; 
10470                 state
->ss_union_layer
++;        // search next layer down 
10478         /* Now copy out the stuff that needs copying out. That means the number of matches, the 
10479          *  search state.  Everything was already put into he return buffer by the vop call. */ 
10481         if ((error 
= copyout((caddr_t
) state
, uap
->state
, sizeof(struct searchstate
))) != 0) { 
10485         if ((error 
= suulong(uap
->nummatches
, (uint64_t)nummatches
)) != 0) { 
10493         kheap_free(KHEAP_DATA_BUFFERS
, searchparams1
, mallocsize
); 
10496 } /* end of searchfs system call */ 
10498 #else /* CONFIG_SEARCHFS */ 
10501 searchfs(__unused proc_t p
, __unused 
struct searchfs_args 
*uap
, __unused 
int32_t *retval
) 
10506 #endif /* CONFIG_SEARCHFS */ 
10509 #if CONFIG_DATALESS_FILES 
10512  * === Namespace Resolver Up-call Mechanism === 
10514  * When I/O is performed to a dataless file or directory (read, write, 
10515  * lookup-in, etc.), the file system performs an upcall to the namespace 
10516  * resolver (filecoordinationd) to materialize the object. 
10518  * We need multiple up-calls to be in flight at once, and we need these 
10519  * up-calls to be interruptible, thus the following implementation: 
10521  * => The nspace_resolver_request represents the in-kernel request state. 
10522  *    It contains a request ID, storage space for the errno code returned 
10523  *    by filecoordinationd, and flags. 
10525  * => The request ID is simply a global monotonically incrementing 32-bit 
10526  *    number.  Outstanding requests are stored in a hash table, and the 
10527  *    hash function is extremely simple. 
10529  * => When an upcall is to be made to filecoordinationd, a request structure 
10530  *    is allocated on the stack (it is small, and needs to live only during 
10531  *    the duration of the call to resolve_nspace_item_ext()).  It is 
10532  *    initialized and inserted into the table.  Some backpressure from 
10533  *    filecoordinationd is applied by limiting the numnber of entries that 
10534  *    can be inserted into the table (and thus limiting the number of 
10535  *    outstanding requests issued to filecoordinationd); waiting for an 
10536  *    available slot is interruptible. 
10538  * => Once the request has been inserted into the table, the up-call is made 
10539  *    to filecoordinationd via a MiG-generated stub.  The up-call returns 
10540  *    immediately and filecoordinationd processes the request asynchronously. 
10542  * => The caller now waits for the request to complete.  Tnis is achieved by 
10543  *    sleeping on the address of the request structure and waiting for 
10544  *    filecoordinationd to mark the request structure as complete.  This 
10545  *    is an interruptible sleep call; if interrupted, the request structure 
10546  *    is removed from the table and EINTR is returned to the caller.  If 
10547  *    this occurs, an advisory up-call is made to filecoordinationd with 
10548  *    the request ID to indicate that the request can be aborted or 
10549  *    de-prioritized at the discretion of filecoordinationd. 
10551  * => When filecoordinationd has completed the request, it signals completion 
10552  *    by writing to the vfs.nspace.complete sysctl node.  Only a process 
10553  *    decorated as a namespace resolver can write to this sysctl node.  The 
10554  *    value is a request ID / errno tuple passed as an array of 2 uint32_t's. 
10555  *    The request ID is looked up in the table, and if the request is found, 
10556  *    the error code is stored in the request structure and a wakeup() 
10557  *    issued on the address of the request structure.  If the request is not 
10558  *    found, we simply drop the completion notification, assuming that the 
10559  *    caller was interrupted. 
10561  * => When the waiting thread wakes up, it extracts the error code from the 
10562  *    request structure, removes the request from the table, and returns the 
10563  *    error code to the calling function.  Fini! 
10566 struct nspace_resolver_request 
{ 
10567         LIST_ENTRY(nspace_resolver_request
) r_hashlink
; 
10570         int             r_resolver_error
; 
10574 #define RRF_COMPLETE    0x0001 
10577 next_nspace_req_id(void) 
10579         static uint32_t next_req_id
; 
10581         return OSAddAtomic(1, &next_req_id
); 
10584 #define NSPACE_RESOLVER_REQ_HASHSIZE    32      /* XXX tune */ 
10585 #define NSPACE_RESOLVER_MAX_OUTSTANDING 256     /* XXX tune */ 
10587 static LIST_HEAD(nspace_resolver_requesthead
, 
10588     nspace_resolver_request
) * nspace_resolver_request_hashtbl
; 
10589 static u_long nspace_resolver_request_hashmask
; 
10590 static u_int nspace_resolver_request_count
; 
10591 static bool nspace_resolver_request_wait_slot
; 
10592 static LCK_GRP_DECLARE(nspace_resolver_request_lck_grp
, "file namespace resolver"); 
10593 static LCK_MTX_DECLARE(nspace_resolver_request_hash_mutex
, 
10594     &nspace_resolver_request_lck_grp
); 
10596 #define NSPACE_REQ_LOCK() \ 
10597         lck_mtx_lock(&nspace_resolver_request_hash_mutex) 
10598 #define NSPACE_REQ_UNLOCK() \ 
10599         lck_mtx_unlock(&nspace_resolver_request_hash_mutex) 
10601 #define NSPACE_RESOLVER_HASH(req_id)    \ 
10602         (&nspace_resolver_request_hashtbl[(req_id) & \ 
10603          nspace_resolver_request_hashmask]) 
10605 static struct nspace_resolver_request 
* 
10606 nspace_resolver_req_lookup(uint32_t req_id
) 
10608         struct nspace_resolver_requesthead 
*bucket
; 
10609         struct nspace_resolver_request 
*req
; 
10611         bucket 
= NSPACE_RESOLVER_HASH(req_id
); 
10612         LIST_FOREACH(req
, bucket
, r_hashlink
) { 
10613                 if (req
->r_req_id 
== req_id
) { 
10622 nspace_resolver_req_add(struct nspace_resolver_request 
*req
) 
10624         struct nspace_resolver_requesthead 
*bucket
; 
10627         while (nspace_resolver_request_count 
>= 
10628             NSPACE_RESOLVER_MAX_OUTSTANDING
) { 
10629                 nspace_resolver_request_wait_slot 
= true; 
10630                 error 
= msleep(&nspace_resolver_request_count
, 
10631                     &nspace_resolver_request_hash_mutex
, 
10632                     PVFS 
| PCATCH
, "nspacerq", NULL
); 
10638         bucket 
= NSPACE_RESOLVER_HASH(req
->r_req_id
); 
10640         assert(nspace_resolver_req_lookup(req
->r_req_id
) == NULL
); 
10641 #endif /* DIAGNOSTIC */ 
10642         LIST_INSERT_HEAD(bucket
, req
, r_hashlink
); 
10643         nspace_resolver_request_count
++; 
10649 nspace_resolver_req_remove(struct nspace_resolver_request 
*req
) 
10651         struct nspace_resolver_requesthead 
*bucket
; 
10653         bucket 
= NSPACE_RESOLVER_HASH(req
->r_req_id
); 
10655         assert(nspace_resolver_req_lookup(req
->r_req_id
) != NULL
); 
10656 #endif /* DIAGNOSTIC */ 
10657         LIST_REMOVE(req
, r_hashlink
); 
10658         nspace_resolver_request_count
--; 
10660         if (nspace_resolver_request_wait_slot
) { 
10661                 nspace_resolver_request_wait_slot 
= false; 
10662                 wakeup(&nspace_resolver_request_count
); 
10667 nspace_resolver_req_cancel(uint32_t req_id
) 
10672         // Failures here aren't fatal -- the cancellation message 
10673         // sent to the resolver is merely advisory. 
10675         kr 
= host_get_filecoordinationd_port(host_priv_self(), &mp
); 
10676         if (kr 
!= KERN_SUCCESS 
|| !IPC_PORT_VALID(mp
)) { 
10680         kr 
= send_nspace_resolve_cancel(mp
, req_id
); 
10681         if (kr 
!= KERN_SUCCESS
) { 
10682                 os_log_error(OS_LOG_DEFAULT
, 
10683                     "NSPACE send_nspace_resolve_cancel failure: %d", kr
); 
10686         ipc_port_release_send(mp
); 
10690 nspace_resolver_req_wait(struct nspace_resolver_request 
*req
) 
10692         bool send_cancel_message 
= false; 
10697         while ((req
->r_flags 
& RRF_COMPLETE
) == 0) { 
10698                 error 
= msleep(req
, &nspace_resolver_request_hash_mutex
, 
10699                     PVFS 
| PCATCH
, "nspace", NULL
); 
10700                 if (error 
&& error 
!= ERESTART
) { 
10701                         req
->r_resolver_error 
= (error 
== EINTR
) ? EINTR 
: 
10703                         send_cancel_message 
= true; 
10708         nspace_resolver_req_remove(req
); 
10710         NSPACE_REQ_UNLOCK(); 
10712         if (send_cancel_message
) { 
10713                 nspace_resolver_req_cancel(req
->r_req_id
); 
10716         return req
->r_resolver_error
; 
10720 nspace_resolver_req_mark_complete( 
10721         struct nspace_resolver_request 
*req
, 
10722         int resolver_error
) 
10724         req
->r_resolver_error 
= resolver_error
; 
10725         req
->r_flags 
|= RRF_COMPLETE
; 
10730 nspace_resolver_req_completed(uint32_t req_id
, int resolver_error
, uint64_t orig_gencount
) 
10732         struct nspace_resolver_request 
*req
; 
10736         // If we don't find the request corresponding to our req_id, 
10737         // just drop the completion signal on the floor; it's likely 
10738         // that the requester interrupted with a signal. 
10740         req 
= nspace_resolver_req_lookup(req_id
); 
10742                 mount_t locked_mp 
= NULL
; 
10744                 locked_mp 
= req
->r_vp
->v_mount
; 
10745                 mount_ref(locked_mp
, 0); 
10746                 mount_lock_renames(locked_mp
); 
10749                 // if the resolver isn't already returning an error and we have an 
10750                 // orig_gencount, then get an iocount on the request vnode and check 
10751                 // that the gencount on req->r_vp has not changed. 
10753                 // note: a ref was taken on req->r_vp when the request was created 
10754                 // and that ref will be dropped by that thread when it wakes up. 
10756                 if (resolver_error 
== 0 && 
10757                     orig_gencount 
!= 0 && 
10758                     vnode_getwithref(req
->r_vp
) == 0) { 
10759                         struct vnode_attr va
; 
10760                         uint64_t cur_gencount
; 
10763                         VATTR_WANTED(&va
, va_recursive_gencount
); 
10765                         if (vnode_getattr(req
->r_vp
, &va
, vfs_context_kernel()) == 0) { 
10766                                 cur_gencount 
= va
.va_recursive_gencount
; 
10771                         if (resolver_error 
== 0 && cur_gencount 
&& orig_gencount 
&& cur_gencount 
!= orig_gencount
) { 
10772                                 printf("nspace.complete: gencount changed! (orig %llu cur %llu)\n", orig_gencount
, cur_gencount
); 
10774                                 // this error will be returned to the thread that initiated the 
10775                                 // materialization of req->r_vp. 
10776                                 resolver_error 
= EBUSY
; 
10778                                 // note: we explicitly do not return an error to the caller (i.e. 
10779                                 // the thread that did the materialization) because they said they 
10783                         vnode_put(req
->r_vp
); 
10786                 mount_unlock_renames(locked_mp
); 
10787                 mount_drop(locked_mp
, 0); 
10789                 nspace_resolver_req_mark_complete(req
, resolver_error
); 
10792         NSPACE_REQ_UNLOCK(); 
10797 static struct proc 
*nspace_resolver_proc
; 
10800 nspace_resolver_get_proc_state(struct proc 
*p
, int *is_resolver
) 
10802         *is_resolver 
= ((p
->p_lflag 
& P_LNSPACE_RESOLVER
) && 
10803             p 
== nspace_resolver_proc
) ? 1 : 0; 
10808 nspace_resolver_set_proc_state(struct proc 
*p
, int is_resolver
) 
10810         vfs_context_t ctx 
= vfs_context_current(); 
10814         // The system filecoordinationd runs as uid == 0.  This also 
10815         // has the nice side-effect of filtering out filecoordinationd 
10816         // running in the simulator. 
10818         if (!vfs_context_issuser(ctx
)) { 
10822         error 
= priv_check_cred(vfs_context_ucred(ctx
), 
10823             PRIV_VFS_DATALESS_RESOLVER
, 0); 
10831                 if (nspace_resolver_proc 
== NULL
) { 
10833                         p
->p_lflag 
|= P_LNSPACE_RESOLVER
; 
10835                         nspace_resolver_proc 
= p
; 
10840                 NSPACE_REQ_UNLOCK(); 
10842                 // This is basically just like the exit case. 
10843                 // nspace_resolver_exited() will verify that the 
10844                 // process is the resolver, and will clear the 
10846                 nspace_resolver_exited(p
); 
10853 nspace_materialization_get_proc_state(struct proc 
*p
, int *is_prevented
) 
10855         if ((p
->p_lflag 
& P_LNSPACE_RESOLVER
) != 0 || 
10856             (p
->p_vfs_iopolicy 
& 
10857             P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
) == 0) { 
10866 nspace_materialization_set_proc_state(struct proc 
*p
, int is_prevented
) 
10868         if (p
->p_lflag 
& P_LNSPACE_RESOLVER
) { 
10869                 return is_prevented 
? 0 : EBUSY
; 
10872         if (is_prevented
) { 
10873                 OSBitAndAtomic16(~((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
), &p
->p_vfs_iopolicy
); 
10875                 OSBitOrAtomic16((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
, &p
->p_vfs_iopolicy
); 
10881 nspace_materialization_get_thread_state(int *is_prevented
) 
10883         uthread_t ut 
= get_bsdthread_info(current_thread()); 
10885         *is_prevented 
= (ut
->uu_flag 
& UT_NSPACE_NODATALESSFAULTS
) ? 1 : 0; 
10890 nspace_materialization_set_thread_state(int is_prevented
) 
10892         uthread_t ut 
= get_bsdthread_info(current_thread()); 
10894         if (is_prevented
) { 
10895                 ut
->uu_flag 
|= UT_NSPACE_NODATALESSFAULTS
; 
10897                 ut
->uu_flag 
&= ~UT_NSPACE_NODATALESSFAULTS
; 
10902 /* the vfs.nspace branch */ 
10903 SYSCTL_NODE(_vfs
, OID_AUTO
, nspace
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, NULL
, "vfs nspace hinge"); 
10906 sysctl_nspace_resolver(__unused 
struct sysctl_oid 
*oidp
, 
10907     __unused 
void *arg1
, __unused 
int arg2
, struct sysctl_req 
*req
) 
10909         struct proc 
*p 
= req
->p
; 
10910         int new_value
, old_value
, changed 
= 0; 
10913         error 
= nspace_resolver_get_proc_state(p
, &old_value
); 
10918         error 
= sysctl_io_number(req
, old_value
, sizeof(int), &new_value
, 
10920         if (error 
== 0 && changed
) { 
10921                 error 
= nspace_resolver_set_proc_state(p
, new_value
); 
10926 /* decorate this process as the dataless file resolver */ 
10927 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, resolver
, 
10928     CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_ANYBODY 
| CTLFLAG_LOCKED
, 
10929     0, 0, sysctl_nspace_resolver
, "I", ""); 
10932 sysctl_nspace_prevent_materialization(__unused 
struct sysctl_oid 
*oidp
, 
10933     __unused 
void *arg1
, __unused 
int arg2
, struct sysctl_req 
*req
) 
10935         struct proc 
*p 
= req
->p
; 
10936         int new_value
, old_value
, changed 
= 0; 
10939         error 
= nspace_materialization_get_proc_state(p
, &old_value
); 
10944         error 
= sysctl_io_number(req
, old_value
, sizeof(int), &new_value
, 
10946         if (error 
== 0 && changed
) { 
10947                 error 
= nspace_materialization_set_proc_state(p
, new_value
); 
10952 /* decorate this process as not wanting to materialize dataless files */ 
10953 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, prevent_materialization
, 
10954     CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_ANYBODY 
| CTLFLAG_LOCKED
, 
10955     0, 0, sysctl_nspace_prevent_materialization
, "I", ""); 
10958 sysctl_nspace_thread_prevent_materialization(__unused 
struct sysctl_oid 
*oidp
, 
10959     __unused 
void *arg1
, __unused 
int arg2
, struct sysctl_req 
*req
) 
10961         int new_value
, old_value
, changed 
= 0; 
10964         error 
= nspace_materialization_get_thread_state(&old_value
); 
10969         error 
= sysctl_io_number(req
, old_value
, sizeof(int), &new_value
, 
10971         if (error 
== 0 && changed
) { 
10972                 error 
= nspace_materialization_set_thread_state(new_value
); 
10977 /* decorate this thread as not wanting to materialize dataless files */ 
10978 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, thread_prevent_materialization
, 
10979     CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_ANYBODY 
| CTLFLAG_LOCKED
, 
10980     0, 0, sysctl_nspace_thread_prevent_materialization
, "I", ""); 
10983 sysctl_nspace_complete(__unused 
struct sysctl_oid 
*oidp
, __unused 
void *arg1
, 
10984     __unused 
int arg2
, struct sysctl_req 
*req
) 
10986         struct proc 
*p 
= req
->p
; 
10987         uint32_t req_status
[2] = { 0, 0 }; 
10988         uint64_t gencount 
= 0; 
10989         int error
, is_resolver
, changed 
= 0, gencount_changed
; 
10991         error 
= nspace_resolver_get_proc_state(p
, &is_resolver
); 
10996         if (!is_resolver
) { 
11000         error 
= sysctl_io_opaque(req
, req_status
, sizeof(req_status
), 
11006         // get the gencount if it was passed 
11007         error 
= sysctl_io_opaque(req
, &gencount
, sizeof(gencount
), 
11008             &gencount_changed
); 
11011                 // we ignore the error because the gencount was optional 
11016          * req_status[0] is the req_id 
11018          * req_status[1] is the errno 
11020         if (error 
== 0 && changed
) { 
11021                 nspace_resolver_req_completed(req_status
[0], 
11022                     (int)req_status
[1], gencount
); 
11027 /* Resolver reports completed reqs here. */ 
11028 SYSCTL_PROC(_vfs_nspace
, OID_AUTO
, complete
, 
11029     CTLTYPE_OPAQUE 
| CTLFLAG_RW 
| CTLFLAG_ANYBODY 
| CTLFLAG_LOCKED
, 
11030     0, 0, sysctl_nspace_complete
, "-", ""); 
11032 #endif /* CONFIG_DATALESS_FILES */ 
11034 #if CONFIG_DATALESS_FILES 
11035 #define __no_dataless_unused    /* nothing */ 
11037 #define __no_dataless_unused    __unused 
11041 vfs_context_dataless_materialization_is_prevented( 
11042         vfs_context_t 
const ctx __no_dataless_unused
) 
11044 #if CONFIG_DATALESS_FILES 
11045         proc_t 
const p 
= vfs_context_proc(ctx
); 
11046         thread_t 
const t 
= vfs_context_thread(ctx
); 
11047         uthread_t 
const ut 
= t 
? get_bsdthread_info(t
) : NULL
; 
11050          * Kernel context ==> return EDEADLK, as we would with any random 
11051          * process decorated as no-materialize. 
11053         if (ctx 
== vfs_context_kernel()) { 
11058          * If the process has the dataless-manipulation entitlement, 
11059          * materialization is prevented, and depending on the kind 
11060          * of file system operation, things get to proceed as if the 
11061          * object is not dataless. 
11063         if (vfs_context_is_dataless_manipulator(ctx
)) { 
11064                 return EJUSTRETURN
; 
11068          * Per-thread decorations override any process-wide decorations. 
11069          * (Foundation uses this, and this overrides even the dataless- 
11070          * manipulation entitlement so as to make API contracts consistent.) 
11073                 if (ut
->uu_flag 
& UT_NSPACE_NODATALESSFAULTS
) { 
11076                 if (ut
->uu_flag 
& UT_NSPACE_FORCEDATALESSFAULTS
) { 
11082          * If the process's iopolicy specifies that dataless files 
11083          * can be materialized, then we let it go ahead. 
11085         if (p
->p_vfs_iopolicy 
& P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES
) { 
11088 #endif /* CONFIG_DATALESS_FILES */ 
11091          * The default behavior is to not materialize dataless files; 
11092          * return to the caller that deadlock was detected. 
11098 nspace_resolver_init(void) 
11100 #if CONFIG_DATALESS_FILES 
11101         nspace_resolver_request_hashtbl 
= 
11102             hashinit(NSPACE_RESOLVER_REQ_HASHSIZE
, 
11103             M_VNODE 
/* XXX */, &nspace_resolver_request_hashmask
); 
11104 #endif /* CONFIG_DATALESS_FILES */ 
11108 nspace_resolver_exited(struct proc 
*p __no_dataless_unused
) 
11110 #if CONFIG_DATALESS_FILES 
11111         struct nspace_resolver_requesthead 
*bucket
; 
11112         struct nspace_resolver_request 
*req
; 
11117         if ((p
->p_lflag 
& P_LNSPACE_RESOLVER
) && 
11118             p 
== nspace_resolver_proc
) { 
11119                 for (idx 
= 0; idx 
<= nspace_resolver_request_hashmask
; idx
++) { 
11120                         bucket 
= &nspace_resolver_request_hashtbl
[idx
]; 
11121                         LIST_FOREACH(req
, bucket
, r_hashlink
) { 
11122                                 nspace_resolver_req_mark_complete(req
, 
11126                 nspace_resolver_proc 
= NULL
; 
11129         NSPACE_REQ_UNLOCK(); 
11130 #endif /* CONFIG_DATALESS_FILES */ 
11134 resolve_nspace_item(struct vnode 
*vp
, uint64_t op
) 
11136         return resolve_nspace_item_ext(vp
, op
, NULL
); 
11139 #define DATALESS_RESOLVER_ENTITLEMENT     \ 
11140         "com.apple.private.vfs.dataless-resolver" 
11141 #define DATALESS_MANIPULATION_ENTITLEMENT \ 
11142         "com.apple.private.vfs.dataless-manipulation" 
11145  * Return TRUE if the vfs context is associated with a process entitled 
11146  * for dataless manipulation. 
11148  * XXX Arguably belongs in vfs_subr.c, but is here because of the 
11149  * complication around CONFIG_DATALESS_FILES. 
11152 vfs_context_is_dataless_manipulator(vfs_context_t ctx __unused
) 
11154 #if CONFIG_DATALESS_FILES 
11155         assert(ctx
->vc_thread 
== current_thread()); 
11156         task_t 
const task 
= current_task(); 
11157         return IOTaskHasEntitlement(task
, DATALESS_MANIPULATION_ENTITLEMENT
) || 
11158                IOTaskHasEntitlement(task
, DATALESS_RESOLVER_ENTITLEMENT
); 
11161 #endif /* CONFIG_DATALESS_FILES */ 
11165 resolve_nspace_item_ext( 
11166         struct vnode 
*vp __no_dataless_unused
, 
11167         uint64_t op __no_dataless_unused
, 
11168         void *arg __unused
) 
11170 #if CONFIG_DATALESS_FILES 
11176         struct nspace_resolver_request req
; 
11178         // only allow namespace events on regular files, directories and symlinks. 
11179         if (vp
->v_type 
!= VREG 
&& vp
->v_type 
!= VDIR 
&& vp
->v_type 
!= VLNK
) { 
11184         // if this is a snapshot event and the vnode is on a 
11185         // disk image just pretend nothing happened since any 
11186         // change to the disk image will cause the disk image 
11187         // itself to get backed up and this avoids multi-way 
11188         // deadlocks between the snapshot handler and the ever 
11189         // popular diskimages-helper process.  the variable 
11190         // nspace_allow_virtual_devs allows this behavior to 
11191         // be overridden (for use by the Mobile TimeMachine 
11192         // testing infrastructure which uses disk images) 
11194         if (op 
& NAMESPACE_HANDLER_SNAPSHOT_EVENT
) { 
11195                 os_log_debug(OS_LOG_DEFAULT
, "NSPACE SNAPSHOT not handled"); 
11199         error 
= vfs_context_dataless_materialization_is_prevented( 
11200                 vfs_context_current()); 
11202                 os_log_debug(OS_LOG_DEFAULT
, 
11203                     "NSPACE process/thread is decorated as no-materialization"); 
11207         kr 
= host_get_filecoordinationd_port(host_priv_self(), &mp
); 
11208         if (kr 
!= KERN_SUCCESS 
|| !IPC_PORT_VALID(mp
)) { 
11209                 os_log_error(OS_LOG_DEFAULT
, "NSPACE no port"); 
11210                 // Treat this like being unable to access the backing 
11215         path 
= zalloc(ZV_NAMEI
); 
11216         path_len 
= MAXPATHLEN
; 
11218         error 
= vn_getpath(vp
, path
, &path_len
); 
11220                 int xxx_rdar44371223
;   /* XXX Mig bug */ 
11221                 req
.r_req_id 
= next_nspace_req_id(); 
11222                 req
.r_resolver_error 
= 0; 
11225                 if ((error 
= vnode_ref(vp
)) == 0) {     // take a ref so that the vnode doesn't go away 
11228                         goto out_release_port
; 
11232                 error 
= nspace_resolver_req_add(&req
); 
11233                 NSPACE_REQ_UNLOCK(); 
11235                         vnode_rele(req
.r_vp
); 
11236                         goto out_release_port
; 
11239                 os_log_debug(OS_LOG_DEFAULT
, "NSPACE resolve_path call"); 
11240                 kr 
= send_nspace_resolve_path(mp
, req
.r_req_id
, 
11241                     current_proc()->p_pid
, (uint32_t)(op 
& 0xffffffff), 
11242                     path
, &xxx_rdar44371223
); 
11243                 if (kr 
!= KERN_SUCCESS
) { 
11244                         // Also treat this like being unable to access 
11245                         // the backing store server. 
11246                         os_log_error(OS_LOG_DEFAULT
, 
11247                             "NSPACE resolve_path failure: %d", kr
); 
11251                         nspace_resolver_req_remove(&req
); 
11252                         NSPACE_REQ_UNLOCK(); 
11253                         vnode_rele(req
.r_vp
); 
11254                         goto out_release_port
; 
11257                 // Give back the memory we allocated earlier while 
11258                 // we wait; we no longer need it. 
11259                 zfree(ZV_NAMEI
, path
); 
11262                 // Request has been submitted to the resolver. 
11263                 // Now (interruptibly) wait for completion. 
11264                 // Upon requrn, the request will have been removed 
11265                 // from the lookup table. 
11266                 error 
= nspace_resolver_req_wait(&req
); 
11268                 vnode_rele(req
.r_vp
); 
11272         if (path 
!= NULL
) { 
11273                 zfree(ZV_NAMEI
, path
); 
11275         ipc_port_release_send(mp
); 
11280 #endif /* CONFIG_DATALESS_FILES */ 
11284 nspace_snapshot_event(__unused vnode_t vp
, __unused  
time_t ctime
, 
11285     __unused 
uint64_t op_type
, __unused 
void *arg
) 
11292 build_volfs_path(struct vnode 
*vp
, char *path
, int *len
) 
11294         struct vnode_attr va
; 
11298         VATTR_WANTED(&va
, va_fsid
); 
11299         VATTR_WANTED(&va
, va_fileid
); 
11301         if (vnode_getattr(vp
, &va
, vfs_context_kernel()) != 0) { 
11302                 *len 
= snprintf(path
, *len
, "/non/existent/path/because/vnode_getattr/failed") + 1; 
11305                 *len 
= snprintf(path
, *len
, "/.vol/%d/%lld", (dev_t
)va
.va_fsid
, va
.va_fileid
) + 1; 
11313 static unsigned long 
11314 fsctl_bogus_command_compat(unsigned long cmd
) 
11317         case IOCBASECMD(FSIOC_SYNC_VOLUME
): 
11318                 return FSIOC_SYNC_VOLUME
; 
11319         case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID
): 
11320                 return FSIOC_ROUTEFS_SETROUTEID
; 
11321         case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS
): 
11322                 return FSIOC_SET_PACKAGE_EXTS
; 
11323         case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE
): 
11324                 return FSIOC_SET_FSTYPENAME_OVERRIDE
; 
11325         case IOCBASECMD(DISK_CONDITIONER_IOC_GET
): 
11326                 return DISK_CONDITIONER_IOC_GET
; 
11327         case IOCBASECMD(DISK_CONDITIONER_IOC_SET
): 
11328                 return DISK_CONDITIONER_IOC_SET
; 
11329         case IOCBASECMD(FSIOC_FIOSEEKHOLE
): 
11330                 return FSIOC_FIOSEEKHOLE
; 
11331         case IOCBASECMD(FSIOC_FIOSEEKDATA
): 
11332                 return FSIOC_FIOSEEKDATA
; 
11333         case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME
): 
11334                 return SPOTLIGHT_IOC_GET_MOUNT_TIME
; 
11335         case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME
): 
11336                 return SPOTLIGHT_IOC_GET_LAST_MTIME
; 
11343 cas_bsdflags_setattr(vnode_t vp
, void *arg
, vfs_context_t ctx
) 
11345         return VNOP_IOCTL(vp
, FSIOC_CAS_BSDFLAGS
, arg
, FWRITE
, ctx
); 
11348 static int __attribute__((noinline
)) 
11349 handle_sync_volume(vnode_t vp
, vnode_t 
*arg_vp
, caddr_t data
, vfs_context_t ctx
) 
11351         struct vfs_attr vfa
; 
11352         mount_t mp 
= vp
->v_mount
; 
11356         /* record vid of vp so we can drop it below. */ 
11357         uint32_t vvid 
= vp
->v_id
; 
11360          * Then grab mount_iterref so that we can release the vnode. 
11361          * Without this, a thread may call vnode_iterate_prepare then 
11362          * get into a deadlock because we've never released the root vp 
11364         error 
= mount_iterref(mp
, 0); 
11371         if (*(uint32_t*)data 
& FSCTL_SYNC_WAIT
) { 
11376          * If the filessytem supports multiple filesytems in a 
11377          * partition (For eg APFS volumes in a container, it knows 
11378          * that the waitfor argument to VFS_SYNC are flags. 
11380         VFSATTR_INIT(&vfa
); 
11381         VFSATTR_WANTED(&vfa
, f_capabilities
); 
11382         if ((vfs_getattr(mp
, &vfa
, vfs_context_current()) == 0) && 
11383             VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) && 
11384             ((vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_SHARED_SPACE
)) && 
11385             ((vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_SHARED_SPACE
))) { 
11389         /* issue the sync for this volume */ 
11390         (void)sync_callback(mp
, &arg
); 
11393          * Then release the mount_iterref once we're done syncing; it's not 
11394          * needed for the VNOP_IOCTL below 
11396         mount_iterdrop(mp
); 
11398         if (arg 
& FSCTL_SYNC_FULLSYNC
) { 
11399                 /* re-obtain vnode iocount on the root vp, if possible */ 
11400                 error 
= vnode_getwithvid(vp
, vvid
); 
11402                         error 
= VNOP_IOCTL(vp
, F_FULLFSYNC
, (caddr_t
)NULL
, 0, ctx
); 
11406         /* mark the argument VP as having been released */ 
11412 static int __attribute__((noinline
)) 
11413 handle_routes(user_addr_t udata
) 
11415         char routepath
[MAXPATHLEN
]; 
11419         if ((error 
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) { 
11422         bzero(routepath
, MAXPATHLEN
); 
11423         error 
= copyinstr(udata
, &routepath
[0], MAXPATHLEN
, &len
); 
11427         error 
= routefs_kernel_mount(routepath
); 
11432 static int __attribute__((noinline
)) 
11433 handle_flags(vnode_t vp
, caddr_t data
, vfs_context_t ctx
) 
11435         struct fsioc_cas_bsdflags 
*cas 
= (struct fsioc_cas_bsdflags 
*)data
; 
11436         struct vnode_attr va
; 
11440         VATTR_SET(&va
, va_flags
, cas
->new_flags
); 
11442         error 
= chflags0(vp
, &va
, cas_bsdflags_setattr
, cas
, ctx
); 
11446 static int __attribute__((noinline
)) 
11447 handle_auth(vnode_t vp
, u_long cmd
, caddr_t data
, u_long options
, vfs_context_t ctx
) 
11449         struct mount 
*mp 
= NULL
; 
11450         errno_t rootauth 
= 0; 
11455          * query the underlying FS and see if it reports something 
11456          * sane for this vnode. If volume is authenticated via 
11457          * chunklist, leave that for the caller to determine. 
11459         rootauth 
= VNOP_IOCTL(vp
, cmd
, data
, (int)options
, ctx
); 
11465  * Make a filesystem-specific control call: 
11469 fsctl_internal(proc_t p
, vnode_t 
*arg_vp
, u_long cmd
, user_addr_t udata
, u_long options
, vfs_context_t ctx
) 
11474 #define STK_PARAMS 128 
11475         char stkbuf
[STK_PARAMS
] = {0}; 
11476         caddr_t data
, memp
; 
11477         vnode_t vp 
= *arg_vp
; 
11479         if (vp
->v_type 
== VCHR 
|| vp
->v_type 
== VBLK
) { 
11483         cmd 
= fsctl_bogus_command_compat(cmd
); 
11485         size 
= IOCPARM_LEN(cmd
); 
11486         if (size 
> IOCPARM_MAX
) { 
11490         is64bit 
= proc_is64bit(p
); 
11494         if (size 
> sizeof(stkbuf
)) { 
11495                 if ((memp 
= (caddr_t
)kheap_alloc(KHEAP_TEMP
, size
, Z_WAITOK
)) == 0) { 
11503         if (cmd 
& IOC_IN
) { 
11505                         error 
= copyin(udata
, data
, size
); 
11508                                         kheap_free(KHEAP_TEMP
, memp
, size
); 
11514                                 *(user_addr_t 
*)data 
= udata
; 
11516                                 *(uint32_t *)data 
= (uint32_t)udata
; 
11519         } else if ((cmd 
& IOC_OUT
) && size
) { 
11521                  * Zero the buffer so the user always 
11522                  * gets back something deterministic. 
11525         } else if (cmd 
& IOC_VOID
) { 
11527                         *(user_addr_t 
*)data 
= udata
; 
11529                         *(uint32_t *)data 
= (uint32_t)udata
; 
11533         /* Check to see if it's a generic command */ 
11535         case FSIOC_SYNC_VOLUME
: 
11536                 error 
= handle_sync_volume(vp
, arg_vp
, data
, ctx
); 
11539         case FSIOC_ROUTEFS_SETROUTEID
: 
11541                 error 
= handle_routes(udata
); 
11545         case FSIOC_SET_PACKAGE_EXTS
: { 
11546                 user_addr_t ext_strings
; 
11547                 uint32_t    num_entries
; 
11548                 uint32_t    max_width
; 
11550                 if ((error 
= priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS
, 0))) { 
11554                 if ((is64bit 
&& size 
!= sizeof(user64_package_ext_info
)) 
11555                     || (is64bit 
== 0 && size 
!= sizeof(user32_package_ext_info
))) { 
11556                         // either you're 64-bit and passed a 64-bit struct or 
11557                         // you're 32-bit and passed a 32-bit struct.  otherwise 
11564                         if (sizeof(user64_addr_t
) > sizeof(user_addr_t
)) { 
11565                                 assert(((user64_package_ext_info 
*)data
)->strings 
<= UINT32_MAX
); 
11567                         ext_strings 
= (user_addr_t
)((user64_package_ext_info 
*)data
)->strings
; 
11568                         num_entries 
= ((user64_package_ext_info 
*)data
)->num_entries
; 
11569                         max_width   
= ((user64_package_ext_info 
*)data
)->max_width
; 
11571                         ext_strings 
= CAST_USER_ADDR_T(((user32_package_ext_info 
*)data
)->strings
); 
11572                         num_entries 
= ((user32_package_ext_info 
*)data
)->num_entries
; 
11573                         max_width   
= ((user32_package_ext_info 
*)data
)->max_width
; 
11575                 error 
= set_package_extensions_table(ext_strings
, num_entries
, max_width
); 
11579         case FSIOC_SET_FSTYPENAME_OVERRIDE
: 
11581                 if ((error 
= suser(kauth_cred_get(), &(current_proc()->p_acflag
)))) { 
11585                         mount_lock(vp
->v_mount
); 
11586                         if (data
[0] != 0) { 
11588                                 for (i 
= 0; i 
< MFSTYPENAMELEN
; i
++) { 
11590                                                 goto continue_copy
; 
11594                                  * Getting here means we have a user data string which has no 
11595                                  * NULL termination in its first MFSTYPENAMELEN bytes. 
11596                                  * This is bogus, let's avoid strlcpy-ing the read data and 
11602                                 strlcpy(&vp
->v_mount
->fstypename_override
[0], data
, MFSTYPENAMELEN
); 
11603                                 vp
->v_mount
->mnt_kern_flag 
|= MNTK_TYPENAME_OVERRIDE
; 
11604                                 if (vfs_isrdonly(vp
->v_mount
) && strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) { 
11605                                         vp
->v_mount
->mnt_kern_flag 
|= MNTK_EXTENDED_SECURITY
; 
11606                                         vp
->v_mount
->mnt_kern_flag 
&= ~MNTK_AUTH_OPAQUE
; 
11609                                 if (strcmp(vp
->v_mount
->fstypename_override
, "mtmfs") == 0) { 
11610                                         vp
->v_mount
->mnt_kern_flag 
&= ~MNTK_EXTENDED_SECURITY
; 
11612                                 vp
->v_mount
->mnt_kern_flag 
&= ~MNTK_TYPENAME_OVERRIDE
; 
11613                                 vp
->v_mount
->fstypename_override
[0] = '\0'; 
11616                         mount_unlock(vp
->v_mount
); 
11621         case DISK_CONDITIONER_IOC_GET
: { 
11622                 error 
= disk_conditioner_get_info(vp
->v_mount
, (disk_conditioner_info 
*)data
); 
11626         case DISK_CONDITIONER_IOC_SET
: { 
11627                 error 
= disk_conditioner_set_info(vp
->v_mount
, (disk_conditioner_info 
*)data
); 
11631         case FSIOC_CAS_BSDFLAGS
: 
11632                 error 
= handle_flags(vp
, data
, ctx
); 
11635         case FSIOC_FD_ONLY_OPEN_ONCE
: { 
11637                 if (vnode_usecount(vp
) > 1) { 
11638                         vnode_lock_spin(vp
); 
11639                         if (vp
->v_lflag 
& VL_HASSTREAMS
) { 
11640                                 if (vnode_isinuse_locked(vp
, 1, 1)) { 
11643                         } else if (vnode_usecount(vp
) > 1) { 
11651         case FSIOC_EVAL_ROOTAUTH
: 
11652                 error 
= handle_auth(vp
, cmd
, data
, options
, ctx
); 
11656                 /* other, known commands shouldn't be passed down here */ 
11659                 case F_TRIM_ACTIVE_FILE
: 
11661                 case F_TRANSCODEKEY
: 
11662                 case F_GETPROTECTIONLEVEL
: 
11663                 case F_GETDEFAULTPROTLEVEL
: 
11664                 case F_MAKECOMPRESSED
: 
11665                 case F_SET_GREEDY_MODE
: 
11666                 case F_SETSTATICCONTENT
: 
11668                 case F_SETBACKINGSTORE
: 
11669                 case F_GETPATH_MTMINFO
: 
11670                 case APFSIOC_REVERT_TO_SNAPSHOT
: 
11671                 case FSIOC_FIOSEEKHOLE
: 
11672                 case FSIOC_FIOSEEKDATA
: 
11673                 case HFS_GET_BOOT_INFO
: 
11674                 case HFS_SET_BOOT_INFO
: 
11678                 case F_BARRIERFSYNC
: 
11681                 case FSIOC_KERNEL_ROOTAUTH
: 
11685                 /* Invoke the filesystem-specific code */ 
11686                 error 
= VNOP_IOCTL(vp
, cmd
, data
, (int)options
, ctx
); 
11688         } /* end switch stmt */ 
11691          * if no errors, copy any data to user. Size was 
11692          * already set and checked above. 
11694         if (error 
== 0 && (cmd 
& IOC_OUT
) && size
) { 
11695                 error 
= copyout(data
, udata
, size
); 
11700                 kheap_free(KHEAP_TEMP
, memp
, size
); 
11708 fsctl(proc_t p
, struct fsctl_args 
*uap
, __unused 
int32_t *retval
) 
11711         struct nameidata nd
; 
11712         uint32_t nameiflags
; 
11714         vfs_context_t ctx 
= vfs_context_current(); 
11716         AUDIT_ARG(cmd
, (int)uap
->cmd
); 
11717         AUDIT_ARG(value32
, uap
->options
); 
11718         /* Get the vnode for the file we are getting info on:  */ 
11721         // if we come through fsctl() then the file is by definition not open. 
11722         // therefore for the FSIOC_FD_ONLY_OPEN_ONCE selector we return an error 
11723         // lest the caller mistakenly thinks the only open is their own (but in 
11724         // reality it's someone elses). 
11726         if (uap
->cmd 
== FSIOC_FD_ONLY_OPEN_ONCE
) { 
11729         if ((uap
->options 
& FSOPT_NOFOLLOW
) == 0) { 
11730                 nameiflags 
|= FOLLOW
; 
11732         if (uap
->cmd 
== FSIOC_FIRMLINK_CTL
) { 
11733                 nameiflags 
|= (CN_FIRMLINK_NOFOLLOW 
| NOCACHE
); 
11735         NDINIT(&nd
, LOOKUP
, OP_FSCTL
, nameiflags 
| AUDITVNPATH1
, 
11736             UIO_USERSPACE
, uap
->path
, ctx
); 
11737         if ((error 
= namei(&nd
))) { 
11744         error 
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
); 
11750         error 
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
); 
11760 ffsctl(proc_t p
, struct ffsctl_args 
*uap
, __unused 
int32_t *retval
) 
11764         vfs_context_t ctx 
= vfs_context_current(); 
11767         AUDIT_ARG(fd
, uap
->fd
); 
11768         AUDIT_ARG(cmd
, (int)uap
->cmd
); 
11769         AUDIT_ARG(value32
, uap
->options
); 
11771         /* Get the vnode for the file we are getting info on:  */ 
11772         if ((error 
= file_vnode(uap
->fd
, &vp
))) { 
11776         if ((error 
= vnode_getwithref(vp
))) { 
11782         if ((error 
= mac_mount_check_fsctl(ctx
, vnode_mount(vp
), uap
->cmd
))) { 
11789         error 
= fsctl_internal(p
, &vp
, uap
->cmd
, (user_addr_t
)uap
->data
, uap
->options
, ctx
); 
11793         /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/ 
11800 /* end of fsctl system call */ 
11802 #define FILESEC_ACCESS_ENTITLEMENT              \ 
11803         "com.apple.private.vfs.filesec-access" 
11806 xattr_entitlement_check(const char *attrname
, vfs_context_t ctx
, bool setting
) 
11808         if (strcmp(attrname
, KAUTH_FILESEC_XATTR
) == 0) { 
11810                  * get: root and tasks with FILESEC_ACCESS_ENTITLEMENT. 
11811                  * set: only tasks with FILESEC_ACCESS_ENTITLEMENT. 
11813                 if ((!setting 
&& vfs_context_issuser(ctx
)) || 
11814                     IOTaskHasEntitlement(current_task(), 
11815                     FILESEC_ACCESS_ENTITLEMENT
)) { 
11824  *  Retrieve the data of an extended attribute. 
11827 getxattr(proc_t p
, struct getxattr_args 
*uap
, user_ssize_t 
*retval
) 
11830         struct nameidata nd
; 
11831         char attrname
[XATTR_MAXNAMELEN 
+ 1]; 
11832         vfs_context_t ctx 
= vfs_context_current(); 
11834         int spacetype 
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64 
: UIO_USERSPACE32
; 
11835         size_t attrsize 
= 0; 
11837         u_int32_t nameiflags
; 
11839         char uio_buf
[UIO_SIZEOF(1)]; 
11841         if (uap
->options 
& (XATTR_NOSECURITY 
| XATTR_NODEFAULT
)) { 
11845         nameiflags 
= (uap
->options 
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
; 
11846         NDINIT(&nd
, LOOKUP
, OP_GETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
); 
11847         if ((error 
= namei(&nd
))) { 
11853         error 
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
); 
11857         if (xattr_protected(attrname
) && 
11858             (error 
= xattr_entitlement_check(attrname
, ctx
, false)) != 0) { 
11862          * the specific check for 0xffffffff is a hack to preserve 
11863          * binaray compatibilty in K64 with applications that discovered 
11864          * that passing in a buf pointer and a size of -1 resulted in 
11865          * just the size of the indicated extended attribute being returned. 
11866          * this isn't part of the documented behavior, but because of the 
11867          * original implemtation's check for "uap->size > 0", this behavior 
11868          * was allowed. In K32 that check turned into a signed comparison 
11869          * even though uap->size is unsigned...  in K64, we blow by that 
11870          * check because uap->size is unsigned and doesn't get sign smeared 
11871          * in the munger for a 32 bit user app.  we also need to add a 
11872          * check to limit the maximum size of the buffer being passed in... 
11873          * unfortunately, the underlying fileystems seem to just malloc 
11874          * the requested size even if the actual extended attribute is tiny. 
11875          * because that malloc is for kernel wired memory, we have to put a 
11876          * sane limit on it. 
11878          * U32 running on K64 will yield 0x00000000ffffffff for uap->size 
11879          * U64 running on K64 will yield -1 (64 bits wide) 
11880          * U32/U64 running on K32 will yield -1 (32 bits wide) 
11882         if (uap
->size 
== 0xffffffff || uap
->size 
== (size_t)-1) { 
11887                 if (uap
->size 
> (size_t)XATTR_MAXSIZE
) { 
11888                         uap
->size 
= XATTR_MAXSIZE
; 
11891                 auio 
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
, 
11892                     &uio_buf
[0], sizeof(uio_buf
)); 
11893                 uio_addiov(auio
, uap
->value
, uap
->size
); 
11896         error 
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, ctx
); 
11901                 *retval 
= uap
->size 
- uio_resid(auio
); 
11903                 *retval 
= (user_ssize_t
)attrsize
; 
11910  * Retrieve the data of an extended attribute. 
11913 fgetxattr(proc_t p
, struct fgetxattr_args 
*uap
, user_ssize_t 
*retval
) 
11916         char attrname
[XATTR_MAXNAMELEN 
+ 1]; 
11917         vfs_context_t ctx 
= vfs_context_current(); 
11919         int spacetype 
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64 
: UIO_USERSPACE32
; 
11920         size_t attrsize 
= 0; 
11923         char uio_buf
[UIO_SIZEOF(1)]; 
11925         if (uap
->options 
& (XATTR_NOFOLLOW 
| XATTR_NOSECURITY 
| XATTR_NODEFAULT
)) { 
11929         if ((error 
= file_vnode(uap
->fd
, &vp
))) { 
11932         if ((error 
= vnode_getwithref(vp
))) { 
11933                 file_drop(uap
->fd
); 
11936         error 
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
); 
11940         if (xattr_protected(attrname
) && 
11941             (error 
= xattr_entitlement_check(attrname
, ctx
, false)) != 0) { 
11944         if (uap
->value 
&& uap
->size 
> 0) { 
11945                 auio 
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_READ
, 
11946                     &uio_buf
[0], sizeof(uio_buf
)); 
11947                 uio_addiov(auio
, uap
->value
, uap
->size
); 
11950         error 
= vn_getxattr(vp
, attrname
, auio
, &attrsize
, uap
->options
, vfs_context_current()); 
11952         (void)vnode_put(vp
); 
11953         file_drop(uap
->fd
); 
11956                 *retval 
= uap
->size 
- uio_resid(auio
); 
11958                 *retval 
= (user_ssize_t
)attrsize
; 
11964  * Set the data of an extended attribute. 
11967 setxattr(proc_t p
, struct setxattr_args 
*uap
, int *retval
) 
11970         struct nameidata nd
; 
11971         char attrname
[XATTR_MAXNAMELEN 
+ 1]; 
11972         vfs_context_t ctx 
= vfs_context_current(); 
11974         int spacetype 
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64 
: UIO_USERSPACE32
; 
11976         u_int32_t nameiflags
; 
11978         char uio_buf
[UIO_SIZEOF(1)]; 
11980         if (uap
->options 
& (XATTR_NOSECURITY 
| XATTR_NODEFAULT
)) { 
11984         error 
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
); 
11986                 if (error 
== EPERM
) { 
11987                         /* if the string won't fit in attrname, copyinstr emits EPERM */ 
11988                         return ENAMETOOLONG
; 
11990                 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */ 
11993         if (xattr_protected(attrname
) && 
11994             (error 
= xattr_entitlement_check(attrname
, ctx
, true)) != 0) { 
11997         if (uap
->size 
!= 0 && uap
->value 
== 0) { 
12000         if (uap
->size 
> INT_MAX
) { 
12004         nameiflags 
= (uap
->options 
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
; 
12005         NDINIT(&nd
, LOOKUP
, OP_SETXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
); 
12006         if ((error 
= namei(&nd
))) { 
12012         auio 
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
, 
12013             &uio_buf
[0], sizeof(uio_buf
)); 
12014         uio_addiov(auio
, uap
->value
, uap
->size
); 
12016         error 
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, ctx
); 
12019                 add_fsevent(FSE_XATTR_MODIFIED
, ctx
, 
12030  * Set the data of an extended attribute. 
12033 fsetxattr(proc_t p
, struct fsetxattr_args 
*uap
, int *retval
) 
12036         char attrname
[XATTR_MAXNAMELEN 
+ 1]; 
12037         vfs_context_t ctx 
= vfs_context_current(); 
12039         int spacetype 
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64 
: UIO_USERSPACE32
; 
12042         char uio_buf
[UIO_SIZEOF(1)]; 
12044         if (uap
->options 
& (XATTR_NOFOLLOW 
| XATTR_NOSECURITY 
| XATTR_NODEFAULT
)) { 
12048         error 
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
); 
12050                 if (error 
== EPERM
) { 
12051                         /* if the string won't fit in attrname, copyinstr emits EPERM */ 
12052                         return ENAMETOOLONG
; 
12054                 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */ 
12057         if (xattr_protected(attrname
) && 
12058             (error 
= xattr_entitlement_check(attrname
, ctx
, true)) != 0) { 
12061         if (uap
->size 
!= 0 && uap
->value 
== 0) { 
12064         if (uap
->size 
> INT_MAX
) { 
12067         if ((error 
= file_vnode(uap
->fd
, &vp
))) { 
12070         if ((error 
= vnode_getwithref(vp
))) { 
12071                 file_drop(uap
->fd
); 
12074         auio 
= uio_createwithbuffer(1, uap
->position
, spacetype
, UIO_WRITE
, 
12075             &uio_buf
[0], sizeof(uio_buf
)); 
12076         uio_addiov(auio
, uap
->value
, uap
->size
); 
12078         error 
= vn_setxattr(vp
, attrname
, auio
, uap
->options
, vfs_context_current()); 
12081                 add_fsevent(FSE_XATTR_MODIFIED
, ctx
, 
12087         file_drop(uap
->fd
); 
12093  * Remove an extended attribute. 
12094  * XXX Code duplication here. 
12097 removexattr(proc_t p
, struct removexattr_args 
*uap
, int *retval
) 
12100         struct nameidata nd
; 
12101         char attrname
[XATTR_MAXNAMELEN 
+ 1]; 
12102         int spacetype 
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64 
: UIO_USERSPACE32
; 
12103         vfs_context_t ctx 
= vfs_context_current(); 
12105         u_int32_t nameiflags
; 
12108         if (uap
->options 
& (XATTR_NOSECURITY 
| XATTR_NODEFAULT
)) { 
12112         error 
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
); 
12116         if (xattr_protected(attrname
)) { 
12119         nameiflags 
= (uap
->options 
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
; 
12120         NDINIT(&nd
, LOOKUP
, OP_REMOVEXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
); 
12121         if ((error 
= namei(&nd
))) { 
12127         error 
= vn_removexattr(vp
, attrname
, uap
->options
, ctx
); 
12130                 add_fsevent(FSE_XATTR_REMOVED
, ctx
, 
12141  * Remove an extended attribute. 
12142  * XXX Code duplication here. 
12145 fremovexattr(__unused proc_t p
, struct fremovexattr_args 
*uap
, int *retval
) 
12148         char attrname
[XATTR_MAXNAMELEN 
+ 1]; 
12152         vfs_context_t ctx 
= vfs_context_current(); 
12155         if (uap
->options 
& (XATTR_NOFOLLOW 
| XATTR_NOSECURITY 
| XATTR_NODEFAULT
)) { 
12159         error 
= copyinstr(uap
->attrname
, attrname
, sizeof(attrname
), &namelen
); 
12163         if (xattr_protected(attrname
)) { 
12166         if ((error 
= file_vnode(uap
->fd
, &vp
))) { 
12169         if ((error 
= vnode_getwithref(vp
))) { 
12170                 file_drop(uap
->fd
); 
12174         error 
= vn_removexattr(vp
, attrname
, uap
->options
, vfs_context_current()); 
12177                 add_fsevent(FSE_XATTR_REMOVED
, ctx
, 
12183         file_drop(uap
->fd
); 
12189  * Retrieve the list of extended attribute names. 
12190  * XXX Code duplication here. 
12193 listxattr(proc_t p
, struct listxattr_args 
*uap
, user_ssize_t 
*retval
) 
12196         struct nameidata nd
; 
12197         vfs_context_t ctx 
= vfs_context_current(); 
12199         int spacetype 
= IS_64BIT_PROCESS(p
) ? UIO_USERSPACE64 
: UIO_USERSPACE32
; 
12200         size_t attrsize 
= 0; 
12201         u_int32_t nameiflags
; 
12203         char uio_buf
[UIO_SIZEOF(1)]; 
12205         if (uap
->options 
& (XATTR_NOSECURITY 
| XATTR_NODEFAULT
)) { 
12209         nameiflags 
= (uap
->options 
& XATTR_NOFOLLOW
) ? 0 : FOLLOW
; 
12210         NDINIT(&nd
, LOOKUP
, OP_LISTXATTR
, nameiflags
, spacetype
, uap
->path
, ctx
); 
12211         if ((error 
= namei(&nd
))) { 
12216         if (uap
->namebuf 
!= 0 && uap
->bufsize 
> 0) { 
12217                 auio 
= uio_createwithbuffer(1, 0, spacetype
, UIO_READ
, 
12218                     &uio_buf
[0], sizeof(uio_buf
)); 
12219                 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
); 
12222         error 
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, ctx
); 
12226                 *retval 
= (user_ssize_t
)uap
->bufsize 
- uio_resid(auio
); 
12228                 *retval 
= (user_ssize_t
)attrsize
; 
12234  * Retrieve the list of extended attribute names. 
12235  * XXX Code duplication here. 
12238 flistxattr(proc_t p
, struct flistxattr_args 
*uap
, user_ssize_t 
*retval
) 
12242         int spacetype 
= proc_is64bit(p
) ? UIO_USERSPACE64 
: UIO_USERSPACE32
; 
12243         size_t attrsize 
= 0; 
12245         char uio_buf
[UIO_SIZEOF(1)]; 
12247         if (uap
->options 
& (XATTR_NOFOLLOW 
| XATTR_NOSECURITY 
| XATTR_NODEFAULT
)) { 
12251         if ((error 
= file_vnode(uap
->fd
, &vp
))) { 
12254         if ((error 
= vnode_getwithref(vp
))) { 
12255                 file_drop(uap
->fd
); 
12258         if (uap
->namebuf 
!= 0 && uap
->bufsize 
> 0) { 
12259                 auio 
= uio_createwithbuffer(1, 0, spacetype
, 
12260                     UIO_READ
, &uio_buf
[0], sizeof(uio_buf
)); 
12261                 uio_addiov(auio
, uap
->namebuf
, uap
->bufsize
); 
12264         error 
= vn_listxattr(vp
, auio
, &attrsize
, uap
->options
, vfs_context_current()); 
12267         file_drop(uap
->fd
); 
12269                 *retval 
= (user_ssize_t
)uap
->bufsize 
- uio_resid(auio
); 
12271                 *retval 
= (user_ssize_t
)attrsize
; 
12277 fsgetpath_internal(vfs_context_t ctx
, int volfs_id
, uint64_t objid
, 
12278     vm_size_t bufsize
, caddr_t buf
, uint32_t options
, int *pathlen
) 
12281         struct mount 
*mp 
= NULL
; 
12285         /* maximum number of times to retry build_path */ 
12286         unsigned int retries 
= 0x10; 
12288         if (bufsize 
> PAGE_SIZE
) { 
12297         if ((mp 
= mount_lookupby_volfsid(volfs_id
, 1)) == NULL
) { 
12298                 error 
= ENOTSUP
;  /* unexpected failure */ 
12304                 struct vfs_attr vfsattr
; 
12305                 int use_vfs_root 
= TRUE
; 
12307                 VFSATTR_INIT(&vfsattr
); 
12308                 VFSATTR_WANTED(&vfsattr
, f_capabilities
); 
12309                 if (!(options 
& FSOPT_ISREALFSID
) && 
12310                     vfs_getattr(mp
, &vfsattr
, vfs_context_kernel()) == 0 && 
12311                     VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) { 
12312                         if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_VOL_GROUPS
) && 
12313                             (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_VOL_GROUPS
)) { 
12314                                 use_vfs_root 
= FALSE
; 
12318                 if (use_vfs_root
) { 
12319                         error 
= VFS_ROOT(mp
, &vp
, ctx
); 
12321                         error 
= VFS_VGET(mp
, objid
, &vp
, ctx
); 
12324                 error 
= VFS_VGET(mp
, (ino64_t
)objid
, &vp
, ctx
); 
12327         if (error 
== ENOENT 
&& (mp
->mnt_flag 
& MNT_UNION
)) { 
12329                  * If the fileid isn't found and we're in a union 
12330                  * mount volume, then see if the fileid is in the 
12331                  * mounted-on volume. 
12333                 struct mount 
*tmp 
= mp
; 
12334                 mp 
= vnode_mount(tmp
->mnt_vnodecovered
); 
12336                 if (vfs_busy(mp
, LK_NOWAIT
) == 0) { 
12348         error 
= mac_vnode_check_fsgetpath(ctx
, vp
); 
12355         /* Obtain the absolute path to this vnode. */ 
12356         bpflags 
= vfs_context_suser(ctx
) ? BUILDPATH_CHECKACCESS 
: 0; 
12357         if (options 
& FSOPT_NOFIRMLINKPATH
) { 
12358                 bpflags 
|= BUILDPATH_NO_FIRMLINK
; 
12360         bpflags 
|= BUILDPATH_CHECK_MOVED
; 
12361         error 
= build_path(vp
, buf
, (int)bufsize
, &length
, bpflags
, ctx
); 
12365                 /* there was a race building the path, try a few more times */ 
12366                 if (error 
== EAGAIN
) { 
12377         AUDIT_ARG(text
, buf
); 
12379         if (kdebug_debugid_enabled(VFS_LOOKUP
) && length 
> 0) { 
12380                 unsigned long path_words
[NUMPARMS
]; 
12381                 size_t path_len 
= sizeof(path_words
); 
12383                 if ((size_t)length 
< path_len
) { 
12384                         memcpy((char *)path_words
, buf
, length
); 
12385                         memset((char *)path_words 
+ length
, 0, path_len 
- length
); 
12389                         memcpy((char *)path_words
, buf 
+ (length 
- path_len
), path_len
); 
12392                 kdebug_vfs_lookup(path_words
, (int)path_len
, vp
, 
12393                     KDBG_VFS_LOOKUP_FLAG_LOOKUP
); 
12396         *pathlen 
= length
; /* may be superseded by error */ 
12403  * Obtain the full pathname of a file system object by id. 
12406 fsgetpath_extended(user_addr_t buf
, user_size_t bufsize
, user_addr_t user_fsid
, uint64_t objid
, 
12407     uint32_t options
, user_ssize_t 
*retval
) 
12409         vfs_context_t ctx 
= vfs_context_current(); 
12415         if (options 
& ~(FSOPT_NOFIRMLINKPATH 
| FSOPT_ISREALFSID
)) { 
12419         if ((error 
= copyin(user_fsid
, (caddr_t
)&fsid
, sizeof(fsid
)))) { 
12422         AUDIT_ARG(value32
, fsid
.val
[0]); 
12423         AUDIT_ARG(value64
, objid
); 
12424         /* Restrict output buffer size for now. */ 
12426         if (bufsize 
> PAGE_SIZE 
|| bufsize 
<= 0) { 
12429         realpath 
= kheap_alloc(KHEAP_TEMP
, bufsize
, Z_WAITOK 
| Z_ZERO
); 
12430         if (realpath 
== NULL
) { 
12434         error 
= fsgetpath_internal(ctx
, fsid
.val
[0], objid
, bufsize
, realpath
, 
12441         error 
= copyout((caddr_t
)realpath
, buf
, length
); 
12443         *retval 
= (user_ssize_t
)length
; /* may be superseded by error */ 
12445         kheap_free(KHEAP_TEMP
, realpath
, bufsize
); 
12450 fsgetpath(__unused proc_t p
, struct fsgetpath_args 
*uap
, user_ssize_t 
*retval
) 
12452         return fsgetpath_extended(uap
->buf
, uap
->bufsize
, uap
->fsid
, uap
->objid
, 
12457 fsgetpath_ext(__unused proc_t p
, struct fsgetpath_ext_args 
*uap
, user_ssize_t 
*retval
) 
12459         return fsgetpath_extended(uap
->buf
, uap
->bufsize
, uap
->fsid
, uap
->objid
, 
12460                    uap
->options
, retval
); 
12464  * Common routine to handle various flavors of statfs data heading out 
12467  * Returns:     0                       Success 
12471 munge_statfs(struct mount 
*mp
, struct vfsstatfs 
*sfsp
, 
12472     user_addr_t bufp
, int *sizep
, boolean_t is_64_bit
, 
12473     boolean_t partial_copy
) 
12476         int             my_size
, copy_size
; 
12479                 struct user64_statfs sfs
; 
12480                 my_size 
= copy_size 
= sizeof(sfs
); 
12481                 bzero(&sfs
, my_size
); 
12482                 sfs
.f_flags 
= mp
->mnt_flag 
& MNT_VISFLAGMASK
; 
12483                 sfs
.f_type 
= (short)mp
->mnt_vtable
->vfc_typenum
; 
12484                 sfs
.f_reserved1 
= (short)sfsp
->f_fssubtype
; 
12485                 sfs
.f_bsize 
= (user64_long_t
)sfsp
->f_bsize
; 
12486                 sfs
.f_iosize 
= (user64_long_t
)sfsp
->f_iosize
; 
12487                 sfs
.f_blocks 
= (user64_long_t
)sfsp
->f_blocks
; 
12488                 sfs
.f_bfree 
= (user64_long_t
)sfsp
->f_bfree
; 
12489                 sfs
.f_bavail 
= (user64_long_t
)sfsp
->f_bavail
; 
12490                 sfs
.f_files 
= (user64_long_t
)sfsp
->f_files
; 
12491                 sfs
.f_ffree 
= (user64_long_t
)sfsp
->f_ffree
; 
12492                 sfs
.f_fsid 
= sfsp
->f_fsid
; 
12493                 sfs
.f_owner 
= sfsp
->f_owner
; 
12494                 if (mp
->mnt_kern_flag 
& MNTK_TYPENAME_OVERRIDE
) { 
12495                         strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
); 
12497                         strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
); 
12499                 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
); 
12500                 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
); 
12502                 if (partial_copy
) { 
12503                         copy_size 
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
)); 
12505                 error 
= copyout((caddr_t
)&sfs
, bufp
, copy_size
); 
12507                 struct user32_statfs sfs
; 
12509                 my_size 
= copy_size 
= sizeof(sfs
); 
12510                 bzero(&sfs
, my_size
); 
12512                 sfs
.f_flags 
= mp
->mnt_flag 
& MNT_VISFLAGMASK
; 
12513                 sfs
.f_type 
= (short)mp
->mnt_vtable
->vfc_typenum
; 
12514                 sfs
.f_reserved1 
= (short)sfsp
->f_fssubtype
; 
12517                  * It's possible for there to be more than 2^^31 blocks in the filesystem, so we 
12518                  * have to fudge the numbers here in that case.   We inflate the blocksize in order 
12519                  * to reflect the filesystem size as best we can. 
12521                 if ((sfsp
->f_blocks 
> INT_MAX
) 
12522                     /* Hack for 4061702 . I think the real fix is for Carbon to 
12523                      * look for some volume capability and not depend on hidden 
12524                      * semantics agreed between a FS and carbon. 
12525                      * f_blocks, f_bfree, and f_bavail set to -1 is the trigger 
12526                      * for Carbon to set bNoVolumeSizes volume attribute. 
12527                      * Without this the webdavfs files cannot be copied onto 
12528                      * disk as they look huge. This change should not affect 
12529                      * XSAN as they should not setting these to -1.. 
12531                     && (sfsp
->f_blocks 
!= 0xffffffffffffffffULL
) 
12532                     && (sfsp
->f_bfree 
!= 0xffffffffffffffffULL
) 
12533                     && (sfsp
->f_bavail 
!= 0xffffffffffffffffULL
)) { 
12537                          * Work out how far we have to shift the block count down to make it fit. 
12538                          * Note that it's possible to have to shift so far that the resulting 
12539                          * blocksize would be unreportably large.  At that point, we will clip 
12540                          * any values that don't fit. 
12542                          * For safety's sake, we also ensure that f_iosize is never reported as 
12543                          * being smaller than f_bsize. 
12545                         for (shift 
= 0; shift 
< 32; shift
++) { 
12546                                 if ((sfsp
->f_blocks 
>> shift
) <= INT_MAX
) { 
12549                                 if ((sfsp
->f_bsize 
<< (shift 
+ 1)) > INT_MAX
) { 
12553 #define __SHIFT_OR_CLIP(x, s)   ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s))) 
12554                         sfs
.f_blocks 
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_blocks
, shift
); 
12555                         sfs
.f_bfree 
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bfree
, shift
); 
12556                         sfs
.f_bavail 
= (user32_long_t
)__SHIFT_OR_CLIP(sfsp
->f_bavail
, shift
); 
12557 #undef __SHIFT_OR_CLIP 
12558                         sfs
.f_bsize 
= (user32_long_t
)(sfsp
->f_bsize 
<< shift
); 
12559                         sfs
.f_iosize 
= (int)lmax(sfsp
->f_iosize
, sfsp
->f_bsize
); 
12561                         /* filesystem is small enough to be reported honestly */ 
12562                         sfs
.f_bsize 
= (user32_long_t
)sfsp
->f_bsize
; 
12563                         sfs
.f_iosize 
= (user32_long_t
)sfsp
->f_iosize
; 
12564                         sfs
.f_blocks 
= (user32_long_t
)sfsp
->f_blocks
; 
12565                         sfs
.f_bfree 
= (user32_long_t
)sfsp
->f_bfree
; 
12566                         sfs
.f_bavail 
= (user32_long_t
)sfsp
->f_bavail
; 
12568                 sfs
.f_files 
= (user32_long_t
)sfsp
->f_files
; 
12569                 sfs
.f_ffree 
= (user32_long_t
)sfsp
->f_ffree
; 
12570                 sfs
.f_fsid 
= sfsp
->f_fsid
; 
12571                 sfs
.f_owner 
= sfsp
->f_owner
; 
12572                 if (mp
->mnt_kern_flag 
& MNTK_TYPENAME_OVERRIDE
) { 
12573                         strlcpy(&sfs
.f_fstypename
[0], &mp
->fstypename_override
[0], MFSNAMELEN
); 
12575                         strlcpy(&sfs
.f_fstypename
[0], &sfsp
->f_fstypename
[0], MFSNAMELEN
); 
12577                 strlcpy(&sfs
.f_mntonname
[0], &sfsp
->f_mntonname
[0], MNAMELEN
); 
12578                 strlcpy(&sfs
.f_mntfromname
[0], &sfsp
->f_mntfromname
[0], MNAMELEN
); 
12580                 if (partial_copy
) { 
12581                         copy_size 
-= (sizeof(sfs
.f_reserved3
) + sizeof(sfs
.f_reserved4
)); 
12583                 error 
= copyout((caddr_t
)&sfs
, bufp
, copy_size
); 
12586         if (sizep 
!= NULL
) { 
12593  * copy stat structure into user_stat structure. 
12596 munge_user64_stat(struct stat 
*sbp
, struct user64_stat 
*usbp
) 
12598         bzero(usbp
, sizeof(*usbp
)); 
12600         usbp
->st_dev 
= sbp
->st_dev
; 
12601         usbp
->st_ino 
= sbp
->st_ino
; 
12602         usbp
->st_mode 
= sbp
->st_mode
; 
12603         usbp
->st_nlink 
= sbp
->st_nlink
; 
12604         usbp
->st_uid 
= sbp
->st_uid
; 
12605         usbp
->st_gid 
= sbp
->st_gid
; 
12606         usbp
->st_rdev 
= sbp
->st_rdev
; 
12607 #ifndef _POSIX_C_SOURCE 
12608         usbp
->st_atimespec
.tv_sec 
= sbp
->st_atimespec
.tv_sec
; 
12609         usbp
->st_atimespec
.tv_nsec 
= sbp
->st_atimespec
.tv_nsec
; 
12610         usbp
->st_mtimespec
.tv_sec 
= sbp
->st_mtimespec
.tv_sec
; 
12611         usbp
->st_mtimespec
.tv_nsec 
= sbp
->st_mtimespec
.tv_nsec
; 
12612         usbp
->st_ctimespec
.tv_sec 
= sbp
->st_ctimespec
.tv_sec
; 
12613         usbp
->st_ctimespec
.tv_nsec 
= sbp
->st_ctimespec
.tv_nsec
; 
12615         usbp
->st_atime 
= sbp
->st_atime
; 
12616         usbp
->st_atimensec 
= sbp
->st_atimensec
; 
12617         usbp
->st_mtime 
= sbp
->st_mtime
; 
12618         usbp
->st_mtimensec 
= sbp
->st_mtimensec
; 
12619         usbp
->st_ctime 
= sbp
->st_ctime
; 
12620         usbp
->st_ctimensec 
= sbp
->st_ctimensec
; 
12622         usbp
->st_size 
= sbp
->st_size
; 
12623         usbp
->st_blocks 
= sbp
->st_blocks
; 
12624         usbp
->st_blksize 
= sbp
->st_blksize
; 
12625         usbp
->st_flags 
= sbp
->st_flags
; 
12626         usbp
->st_gen 
= sbp
->st_gen
; 
12627         usbp
->st_lspare 
= sbp
->st_lspare
; 
12628         usbp
->st_qspare
[0] = sbp
->st_qspare
[0]; 
12629         usbp
->st_qspare
[1] = sbp
->st_qspare
[1]; 
12633 munge_user32_stat(struct stat 
*sbp
, struct user32_stat 
*usbp
) 
12635         bzero(usbp
, sizeof(*usbp
)); 
12637         usbp
->st_dev 
= sbp
->st_dev
; 
12638         usbp
->st_ino 
= sbp
->st_ino
; 
12639         usbp
->st_mode 
= sbp
->st_mode
; 
12640         usbp
->st_nlink 
= sbp
->st_nlink
; 
12641         usbp
->st_uid 
= sbp
->st_uid
; 
12642         usbp
->st_gid 
= sbp
->st_gid
; 
12643         usbp
->st_rdev 
= sbp
->st_rdev
; 
12644 #ifndef _POSIX_C_SOURCE 
12645         usbp
->st_atimespec
.tv_sec 
= (user32_time_t
)sbp
->st_atimespec
.tv_sec
; 
12646         usbp
->st_atimespec
.tv_nsec 
= (user32_long_t
)sbp
->st_atimespec
.tv_nsec
; 
12647         usbp
->st_mtimespec
.tv_sec 
= (user32_time_t
)sbp
->st_mtimespec
.tv_sec
; 
12648         usbp
->st_mtimespec
.tv_nsec 
= (user32_long_t
)sbp
->st_mtimespec
.tv_nsec
; 
12649         usbp
->st_ctimespec
.tv_sec 
= (user32_time_t
)sbp
->st_ctimespec
.tv_sec
; 
12650         usbp
->st_ctimespec
.tv_nsec 
= (user32_long_t
)sbp
->st_ctimespec
.tv_nsec
; 
12652         usbp
->st_atime 
= sbp
->st_atime
; 
12653         usbp
->st_atimensec 
= sbp
->st_atimensec
; 
12654         usbp
->st_mtime 
= sbp
->st_mtime
; 
12655         usbp
->st_mtimensec 
= sbp
->st_mtimensec
; 
12656         usbp
->st_ctime 
= sbp
->st_ctime
; 
12657         usbp
->st_ctimensec 
= sbp
->st_ctimensec
; 
12659         usbp
->st_size 
= sbp
->st_size
; 
12660         usbp
->st_blocks 
= sbp
->st_blocks
; 
12661         usbp
->st_blksize 
= sbp
->st_blksize
; 
12662         usbp
->st_flags 
= sbp
->st_flags
; 
12663         usbp
->st_gen 
= sbp
->st_gen
; 
12664         usbp
->st_lspare 
= sbp
->st_lspare
; 
12665         usbp
->st_qspare
[0] = sbp
->st_qspare
[0]; 
12666         usbp
->st_qspare
[1] = sbp
->st_qspare
[1]; 
12670  * copy stat64 structure into user_stat64 structure. 
12673 munge_user64_stat64(struct stat64 
*sbp
, struct user64_stat64 
*usbp
) 
12675         bzero(usbp
, sizeof(*usbp
)); 
12677         usbp
->st_dev 
= sbp
->st_dev
; 
12678         usbp
->st_ino 
= sbp
->st_ino
; 
12679         usbp
->st_mode 
= sbp
->st_mode
; 
12680         usbp
->st_nlink 
= sbp
->st_nlink
; 
12681         usbp
->st_uid 
= sbp
->st_uid
; 
12682         usbp
->st_gid 
= sbp
->st_gid
; 
12683         usbp
->st_rdev 
= sbp
->st_rdev
; 
12684 #ifndef _POSIX_C_SOURCE 
12685         usbp
->st_atimespec
.tv_sec 
= sbp
->st_atimespec
.tv_sec
; 
12686         usbp
->st_atimespec
.tv_nsec 
= sbp
->st_atimespec
.tv_nsec
; 
12687         usbp
->st_mtimespec
.tv_sec 
= sbp
->st_mtimespec
.tv_sec
; 
12688         usbp
->st_mtimespec
.tv_nsec 
= sbp
->st_mtimespec
.tv_nsec
; 
12689         usbp
->st_ctimespec
.tv_sec 
= sbp
->st_ctimespec
.tv_sec
; 
12690         usbp
->st_ctimespec
.tv_nsec 
= sbp
->st_ctimespec
.tv_nsec
; 
12691         usbp
->st_birthtimespec
.tv_sec 
= sbp
->st_birthtimespec
.tv_sec
; 
12692         usbp
->st_birthtimespec
.tv_nsec 
= sbp
->st_birthtimespec
.tv_nsec
; 
12694         usbp
->st_atime 
= sbp
->st_atime
; 
12695         usbp
->st_atimensec 
= sbp
->st_atimensec
; 
12696         usbp
->st_mtime 
= sbp
->st_mtime
; 
12697         usbp
->st_mtimensec 
= sbp
->st_mtimensec
; 
12698         usbp
->st_ctime 
= sbp
->st_ctime
; 
12699         usbp
->st_ctimensec 
= sbp
->st_ctimensec
; 
12700         usbp
->st_birthtime 
= sbp
->st_birthtime
; 
12701         usbp
->st_birthtimensec 
= sbp
->st_birthtimensec
; 
12703         usbp
->st_size 
= sbp
->st_size
; 
12704         usbp
->st_blocks 
= sbp
->st_blocks
; 
12705         usbp
->st_blksize 
= sbp
->st_blksize
; 
12706         usbp
->st_flags 
= sbp
->st_flags
; 
12707         usbp
->st_gen 
= sbp
->st_gen
; 
12708         usbp
->st_lspare 
= sbp
->st_lspare
; 
12709         usbp
->st_qspare
[0] = sbp
->st_qspare
[0]; 
12710         usbp
->st_qspare
[1] = sbp
->st_qspare
[1]; 
12714 munge_user32_stat64(struct stat64 
*sbp
, struct user32_stat64 
*usbp
) 
12716         bzero(usbp
, sizeof(*usbp
)); 
12718         usbp
->st_dev 
= sbp
->st_dev
; 
12719         usbp
->st_ino 
= sbp
->st_ino
; 
12720         usbp
->st_mode 
= sbp
->st_mode
; 
12721         usbp
->st_nlink 
= sbp
->st_nlink
; 
12722         usbp
->st_uid 
= sbp
->st_uid
; 
12723         usbp
->st_gid 
= sbp
->st_gid
; 
12724         usbp
->st_rdev 
= sbp
->st_rdev
; 
12725 #ifndef _POSIX_C_SOURCE 
12726         usbp
->st_atimespec
.tv_sec 
= (user32_time_t
)sbp
->st_atimespec
.tv_sec
; 
12727         usbp
->st_atimespec
.tv_nsec 
= (user32_long_t
)sbp
->st_atimespec
.tv_nsec
; 
12728         usbp
->st_mtimespec
.tv_sec 
= (user32_time_t
)sbp
->st_mtimespec
.tv_sec
; 
12729         usbp
->st_mtimespec
.tv_nsec 
= (user32_long_t
)sbp
->st_mtimespec
.tv_nsec
; 
12730         usbp
->st_ctimespec
.tv_sec 
= (user32_time_t
)sbp
->st_ctimespec
.tv_sec
; 
12731         usbp
->st_ctimespec
.tv_nsec 
= (user32_long_t
)sbp
->st_ctimespec
.tv_nsec
; 
12732         usbp
->st_birthtimespec
.tv_sec 
= (user32_time_t
)sbp
->st_birthtimespec
.tv_sec
; 
12733         usbp
->st_birthtimespec
.tv_nsec 
= (user32_long_t
)sbp
->st_birthtimespec
.tv_nsec
; 
12735         usbp
->st_atime 
= sbp
->st_atime
; 
12736         usbp
->st_atimensec 
= sbp
->st_atimensec
; 
12737         usbp
->st_mtime 
= sbp
->st_mtime
; 
12738         usbp
->st_mtimensec 
= sbp
->st_mtimensec
; 
12739         usbp
->st_ctime 
= sbp
->st_ctime
; 
12740         usbp
->st_ctimensec 
= sbp
->st_ctimensec
; 
12741         usbp
->st_birthtime 
= sbp
->st_birthtime
; 
12742         usbp
->st_birthtimensec 
= sbp
->st_birthtimensec
; 
12744         usbp
->st_size 
= sbp
->st_size
; 
12745         usbp
->st_blocks 
= sbp
->st_blocks
; 
12746         usbp
->st_blksize 
= sbp
->st_blksize
; 
12747         usbp
->st_flags 
= sbp
->st_flags
; 
12748         usbp
->st_gen 
= sbp
->st_gen
; 
12749         usbp
->st_lspare 
= sbp
->st_lspare
; 
12750         usbp
->st_qspare
[0] = sbp
->st_qspare
[0]; 
12751         usbp
->st_qspare
[1] = sbp
->st_qspare
[1]; 
12755  * Purge buffer cache for simulating cold starts 
12758 vnode_purge_callback(struct vnode 
*vp
, __unused 
void *cargs
) 
12760         ubc_msync(vp
, (off_t
)0, ubc_getsize(vp
), NULL 
/* off_t *resid_off */, UBC_PUSHALL 
| UBC_INVALIDATE
); 
12762         return VNODE_RETURNED
; 
12766 vfs_purge_callback(mount_t mp
, __unused 
void * arg
) 
12768         vnode_iterate(mp
, VNODE_WAIT 
| VNODE_ITERATE_ALL
, vnode_purge_callback
, NULL
); 
12770         return VFS_RETURNED
; 
12774 vfs_purge(__unused 
struct proc 
*p
, __unused 
struct vfs_purge_args 
*uap
, __unused 
int32_t *retval
) 
12776         if (!kauth_cred_issuser(kauth_cred_get())) { 
12780         vfs_iterate(0 /* flags */, vfs_purge_callback
, NULL
); 
12786  * gets the vnode associated with the (unnamed) snapshot directory 
12787  * for a Filesystem. The snapshot directory vnode is returned with 
12788  * an iocount on it. 
12791 vnode_get_snapdir(vnode_t rvp
, vnode_t 
*sdvpp
, vfs_context_t ctx
) 
12793         return VFS_VGET_SNAPDIR(vnode_mount(rvp
), sdvpp
, ctx
); 
12797  * Get the snapshot vnode. 
12799  * If successful, the call returns with an iocount on *rvpp ,*sdvpp and 
12800  * needs nameidone() on ndp. 
12802  * If the snapshot vnode exists it is returned in ndp->ni_vp. 
12804  * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is 
12808 vnode_get_snapshot(int dirfd
, vnode_t 
*rvpp
, vnode_t 
*sdvpp
, 
12809     user_addr_t name
, struct nameidata 
*ndp
, int32_t op
, 
12810 #if !CONFIG_TRIGGERS 
12813     enum path_operation pathop
, 
12819         struct vfs_attr vfa
; 
12824         error 
= vnode_getfromfd(ctx
, dirfd
, rvpp
); 
12829         if (!vnode_isvroot(*rvpp
)) { 
12834         /* Make sure the filesystem supports snapshots */ 
12835         VFSATTR_INIT(&vfa
); 
12836         VFSATTR_WANTED(&vfa
, f_capabilities
); 
12837         if ((vfs_getattr(vnode_mount(*rvpp
), &vfa
, ctx
) != 0) || 
12838             !VFSATTR_IS_SUPPORTED(&vfa
, f_capabilities
) || 
12839             !((vfa
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & 
12840             VOL_CAP_INT_SNAPSHOT
)) || 
12841             !((vfa
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & 
12842             VOL_CAP_INT_SNAPSHOT
))) { 
12847         error 
= vnode_get_snapdir(*rvpp
, sdvpp
, ctx
); 
12852         name_buf 
= zalloc_flags(ZV_NAMEI
, Z_WAITOK
); 
12853         error 
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
); 
12859          * Some sanity checks- name can't be empty, "." or ".." or have slashes. 
12860          * (the length returned by copyinstr includes the terminating NUL) 
12862         if ((name_len 
== 1) || (name_len 
== 2 && name_buf
[0] == '.') || 
12863             (name_len 
== 3 && name_buf
[0] == '.' && name_buf
[1] == '.')) { 
12867         for (i 
= 0; i 
< (int)name_len 
&& name_buf
[i
] != '/'; i
++) { 
12870         if (i 
< (int)name_len
) { 
12876         if (op 
== CREATE
) { 
12877                 error 
= mac_mount_check_snapshot_create(ctx
, vnode_mount(*rvpp
), 
12879         } else if (op 
== DELETE
) { 
12880                 error 
= mac_mount_check_snapshot_delete(ctx
, vnode_mount(*rvpp
), 
12888         /* Check if the snapshot already exists ... */ 
12889         NDINIT(ndp
, op
, pathop
, USEDVP 
| NOCACHE 
| AUDITVNPATH1
, 
12890             UIO_SYSSPACE
, CAST_USER_ADDR_T(name_buf
), ctx
); 
12891         ndp
->ni_dvp 
= *sdvpp
; 
12893         error 
= namei(ndp
); 
12895         zfree(ZV_NAMEI
, name_buf
); 
12911  * create a filesystem snapshot (for supporting filesystems) 
12913  * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL) 
12914  * We get to the (unnamed) snapshot directory vnode and create the vnode 
12915  * for the snapshot in it. 
12919  *    a) Passed in name for snapshot cannot have slashes. 
12920  *    b) name can't be "." or ".." 
12922  * Since this requires superuser privileges, vnode_authorize calls are not 
12925 static int __attribute__((noinline
)) 
12926 snapshot_create(int dirfd
, user_addr_t name
, __unused 
uint32_t flags
, 
12929         vnode_t rvp
, snapdvp
; 
12931         struct nameidata 
*ndp
; 
12933         ndp 
= kheap_alloc(KHEAP_TEMP
, sizeof(*ndp
), Z_WAITOK
); 
12935         error 
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, ndp
, CREATE
, 
12942                 vnode_put(ndp
->ni_vp
); 
12945                 struct vnode_attr 
*vap
; 
12946                 vnode_t vp 
= NULLVP
; 
12948                 vap 
= kheap_alloc(KHEAP_TEMP
, sizeof(*vap
), Z_WAITOK
); 
12951                 VATTR_SET(vap
, va_type
, VREG
); 
12952                 VATTR_SET(vap
, va_mode
, 0); 
12954                 error 
= vn_create(snapdvp
, &vp
, ndp
, vap
, 
12955                     VN_CREATE_NOAUTH  
| VN_CREATE_NOINHERIT
, 0, NULL
, ctx
); 
12956                 if (!error 
&& vp
) { 
12960                 kheap_free(KHEAP_TEMP
, vap
, sizeof(*vap
)); 
12964         vnode_put(snapdvp
); 
12967         kheap_free(KHEAP_TEMP
, ndp
, sizeof(*ndp
)); 
12973  * Delete a Filesystem snapshot 
12975  * get the vnode for the unnamed snapshot directory and the snapshot and 
12976  * delete the snapshot. 
12978 static int __attribute__((noinline
)) 
12979 snapshot_delete(int dirfd
, user_addr_t name
, __unused 
uint32_t flags
, 
12982         vnode_t rvp
, snapdvp
; 
12984         struct nameidata 
*ndp
; 
12986         ndp 
= kheap_alloc(KHEAP_TEMP
, sizeof(*ndp
), Z_WAITOK
); 
12988         error 
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, ndp
, DELETE
, 
12994         error 
= VNOP_REMOVE(snapdvp
, ndp
->ni_vp
, &ndp
->ni_cnd
, 
12995             VNODE_REMOVE_SKIP_NAMESPACE_EVENT
, ctx
); 
12997         vnode_put(ndp
->ni_vp
); 
12999         vnode_put(snapdvp
); 
13002         kheap_free(KHEAP_TEMP
, ndp
, sizeof(*ndp
)); 
13008  * Revert a filesystem to a snapshot 
13010  * Marks the filesystem to revert to the given snapshot on next mount. 
13012 static int __attribute__((noinline
)) 
13013 snapshot_revert(int dirfd
, user_addr_t name
, __unused 
uint32_t flags
, 
13019         struct fs_snapshot_revert_args revert_data
; 
13020         struct componentname cnp
; 
13024         error 
= vnode_getfromfd(ctx
, dirfd
, &rvp
); 
13028         mp 
= vnode_mount(rvp
); 
13030         name_buf 
= zalloc_flags(ZV_NAMEI
, Z_WAITOK
); 
13031         error 
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
); 
13033                 zfree(ZV_NAMEI
, name_buf
); 
13039         error 
= mac_mount_check_snapshot_revert(ctx
, mp
, name_buf
); 
13041                 zfree(ZV_NAMEI
, name_buf
); 
13048          * Grab mount_iterref so that we can release the vnode, 
13049          * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync. 
13051         error 
= mount_iterref(mp
, 0); 
13054                 zfree(ZV_NAMEI
, name_buf
); 
13058         memset(&cnp
, 0, sizeof(cnp
)); 
13059         cnp
.cn_pnbuf 
= (char *)name_buf
; 
13060         cnp
.cn_nameiop 
= LOOKUP
; 
13061         cnp
.cn_flags 
= ISLASTCN 
| HASBUF
; 
13062         cnp
.cn_pnlen 
= MAXPATHLEN
; 
13063         cnp
.cn_nameptr 
= cnp
.cn_pnbuf
; 
13064         cnp
.cn_namelen 
= (int)name_len
; 
13065         revert_data
.sr_cnp 
= &cnp
; 
13067         error 
= VFS_IOCTL(mp
, VFSIOC_REVERT_SNAPSHOT
, (caddr_t
)&revert_data
, 0, ctx
); 
13068         mount_iterdrop(mp
); 
13069         zfree(ZV_NAMEI
, name_buf
); 
13072                 /* If there was any error, try again using VNOP_IOCTL */ 
13075                 struct nameidata namend
; 
13077                 error 
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, &namend
, LOOKUP
, 
13084                 error 
= VNOP_IOCTL(namend
.ni_vp
, APFSIOC_REVERT_TO_SNAPSHOT
, (caddr_t
) NULL
, 
13087                 vnode_put(namend
.ni_vp
); 
13088                 nameidone(&namend
); 
13089                 vnode_put(snapdvp
); 
13097  * rename a Filesystem snapshot 
13099  * get the vnode for the unnamed snapshot directory and the snapshot and 
13100  * rename the snapshot. This is a very specialised (and simple) case of 
13101  * rename(2) (which has to deal with a lot more complications). It differs 
13102  * slightly from rename(2) in that EEXIST is returned if the new name exists. 
13104 static int __attribute__((noinline
)) 
13105 snapshot_rename(int dirfd
, user_addr_t old
, user_addr_t 
new, 
13106     __unused 
uint32_t flags
, vfs_context_t ctx
) 
13108         vnode_t rvp
, snapdvp
; 
13110         caddr_t newname_buf
; 
13113         struct nameidata 
*fromnd
, *tond
; 
13114         /* carving out a chunk for structs that are too big to be on stack. */ 
13116                 struct nameidata from_node
; 
13117                 struct nameidata to_node
; 
13120         __rename_data 
= kheap_alloc(KHEAP_TEMP
, sizeof(*__rename_data
), Z_WAITOK
); 
13121         fromnd 
= &__rename_data
->from_node
; 
13122         tond 
= &__rename_data
->to_node
; 
13124         error 
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, old
, fromnd
, DELETE
, 
13129         fvp  
= fromnd
->ni_vp
; 
13131         newname_buf 
= zalloc_flags(ZV_NAMEI
, Z_WAITOK
); 
13132         error 
= copyinstr(new, newname_buf
, MAXPATHLEN
, &name_len
); 
13138          * Some sanity checks- new name can't be empty, "." or ".." or have 
13140          * (the length returned by copyinstr includes the terminating NUL) 
13142          * The FS rename VNOP is suppossed to handle this but we'll pick it 
13145         if ((name_len 
== 1) || (name_len 
== 2 && newname_buf
[0] == '.') || 
13146             (name_len 
== 3 && newname_buf
[0] == '.' && newname_buf
[1] == '.')) { 
13150         for (i 
= 0; i 
< (int)name_len 
&& newname_buf
[i
] != '/'; i
++) { 
13153         if (i 
< (int)name_len
) { 
13159         error 
= mac_mount_check_snapshot_create(ctx
, vnode_mount(rvp
), 
13166         NDINIT(tond
, RENAME
, OP_RENAME
, USEDVP 
| NOCACHE 
| AUDITVNPATH2
, 
13167             UIO_SYSSPACE
, CAST_USER_ADDR_T(newname_buf
), ctx
); 
13168         tond
->ni_dvp 
= snapdvp
; 
13170         error 
= namei(tond
); 
13173         } else if (tond
->ni_vp
) { 
13175                  * snapshot rename behaves differently than rename(2) - if the 
13176                  * new name exists, EEXIST is returned. 
13178                 vnode_put(tond
->ni_vp
); 
13183         error 
= VNOP_RENAME(snapdvp
, fvp
, &fromnd
->ni_cnd
, snapdvp
, NULLVP
, 
13184             &tond
->ni_cnd
, ctx
); 
13189         zfree(ZV_NAMEI
, newname_buf
); 
13191         vnode_put(snapdvp
); 
13195         kheap_free(KHEAP_TEMP
, __rename_data
, sizeof(*__rename_data
)); 
13200  * Mount a Filesystem snapshot 
13202  * get the vnode for the unnamed snapshot directory and the snapshot and 
13203  * mount the snapshot. 
13205 static int __attribute__((noinline
)) 
13206 snapshot_mount(int dirfd
, user_addr_t name
, user_addr_t directory
, 
13207     __unused user_addr_t mnt_data
, __unused 
uint32_t flags
, vfs_context_t ctx
) 
13210         vnode_t rvp
, snapdvp
, snapvp
, vp
, pvp
; 
13211         struct fs_snapshot_mount_args smnt_data
; 
13213         struct nameidata 
*snapndp
, *dirndp
; 
13214         /* carving out a chunk for structs that are too big to be on stack. */ 
13216                 struct nameidata snapnd
; 
13217                 struct nameidata dirnd
; 
13218         } * __snapshot_mount_data
; 
13220         __snapshot_mount_data 
= kheap_alloc(KHEAP_TEMP
, 
13221             sizeof(*__snapshot_mount_data
), Z_WAITOK
); 
13222         snapndp 
= &__snapshot_mount_data
->snapnd
; 
13223         dirndp 
= &__snapshot_mount_data
->dirnd
; 
13225         error 
= vnode_get_snapshot(dirfd
, &rvp
, &snapdvp
, name
, snapndp
, LOOKUP
, 
13231         snapvp  
= snapndp
->ni_vp
; 
13232         if (!vnode_mount(rvp
) || (vnode_mount(rvp
) == dead_mountp
)) { 
13237         /* Get the vnode to be covered */ 
13238         NDINIT(dirndp
, LOOKUP
, OP_MOUNT
, FOLLOW 
| AUDITVNPATH1 
| WANTPARENT
, 
13239             UIO_USERSPACE
, directory
, ctx
); 
13240         error 
= namei(dirndp
); 
13245         vp 
= dirndp
->ni_vp
; 
13246         pvp 
= dirndp
->ni_dvp
; 
13247         mp 
= vnode_mount(rvp
); 
13249         if ((vp
->v_flag 
& VROOT
) && (vp
->v_mount
->mnt_flag 
& MNT_ROOTFS
)) { 
13255         error 
= mac_mount_check_snapshot_mount(ctx
, rvp
, vp
, &dirndp
->ni_cnd
, snapndp
->ni_cnd
.cn_nameptr
, 
13256             mp
->mnt_vfsstat
.f_fstypename
); 
13262         smnt_data
.sm_mp  
= mp
; 
13263         smnt_data
.sm_cnp 
= &snapndp
->ni_cnd
; 
13264         error 
= mount_common(mp
->mnt_vfsstat
.f_fstypename
, pvp
, vp
, 
13265             &dirndp
->ni_cnd
, CAST_USER_ADDR_T(&smnt_data
), flags 
& MNT_DONTBROWSE
, 
13266             KERNEL_MOUNT_SNAPSHOT
, NULL
, FALSE
, ctx
); 
13274         vnode_put(snapdvp
); 
13276         nameidone(snapndp
); 
13278         kheap_free(KHEAP_TEMP
, __snapshot_mount_data
, 
13279             sizeof(*__snapshot_mount_data
)); 
13284  * Root from a snapshot of the filesystem 
13286  * Marks the filesystem to root from the given snapshot on next boot. 
13288 static int __attribute__((noinline
)) 
13289 snapshot_root(int dirfd
, user_addr_t name
, __unused 
uint32_t flags
, 
13295         struct fs_snapshot_root_args root_data
; 
13296         struct componentname cnp
; 
13300         error 
= vnode_getfromfd(ctx
, dirfd
, &rvp
); 
13304         mp 
= vnode_mount(rvp
); 
13306         name_buf 
= zalloc_flags(ZV_NAMEI
, Z_WAITOK
); 
13307         error 
= copyinstr(name
, name_buf
, MAXPATHLEN
, &name_len
); 
13309                 zfree(ZV_NAMEI
, name_buf
); 
13314         // XXX MAC checks ? 
13317          * Grab mount_iterref so that we can release the vnode, 
13318          * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync. 
13320         error 
= mount_iterref(mp
, 0); 
13323                 zfree(ZV_NAMEI
, name_buf
); 
13327         memset(&cnp
, 0, sizeof(cnp
)); 
13328         cnp
.cn_pnbuf 
= (char *)name_buf
; 
13329         cnp
.cn_nameiop 
= LOOKUP
; 
13330         cnp
.cn_flags 
= ISLASTCN 
| HASBUF
; 
13331         cnp
.cn_pnlen 
= MAXPATHLEN
; 
13332         cnp
.cn_nameptr 
= cnp
.cn_pnbuf
; 
13333         cnp
.cn_namelen 
= (int)name_len
; 
13334         root_data
.sr_cnp 
= &cnp
; 
13336         error 
= VFS_IOCTL(mp
, VFSIOC_ROOT_SNAPSHOT
, (caddr_t
)&root_data
, 0, ctx
); 
13338         mount_iterdrop(mp
); 
13339         zfree(ZV_NAMEI
, name_buf
); 
13345  * FS snapshot operations dispatcher 
13348 fs_snapshot(__unused proc_t p
, struct fs_snapshot_args 
*uap
, 
13349     __unused 
int32_t *retval
) 
13352         vfs_context_t ctx 
= vfs_context_current(); 
13354         AUDIT_ARG(fd
, uap
->dirfd
); 
13355         AUDIT_ARG(value32
, uap
->op
); 
13357         error 
= priv_check_cred(vfs_context_ucred(ctx
), PRIV_VFS_SNAPSHOT
, 0); 
13363          * Enforce user authorization for snapshot modification operations, 
13364          * or if trying to root from snapshot. 
13366         if (uap
->op 
!= SNAPSHOT_OP_MOUNT
) { 
13367                 vnode_t dvp 
= NULLVP
; 
13368                 vnode_t devvp 
= NULLVP
; 
13371                 error 
= vnode_getfromfd(ctx
, uap
->dirfd
, &dvp
); 
13375                 mp 
= vnode_mount(dvp
); 
13376                 devvp 
= mp
->mnt_devvp
; 
13378                 /* get an iocount on devvp */ 
13379                 if (devvp 
== NULLVP
) { 
13380                         error 
= vnode_lookup(mp
->mnt_vfsstat
.f_mntfromname
, 0, &devvp
, ctx
); 
13381                         /* for mounts which arent block devices */ 
13382                         if (error 
== ENOENT
) { 
13386                         error 
= vnode_getwithref(devvp
); 
13394                 if ((vfs_context_issuser(ctx
) == 0) && 
13395                     (vnode_authorize(devvp
, NULL
, KAUTH_VNODE_WRITE_DATA
, ctx
) != 0)) { 
13407         case SNAPSHOT_OP_CREATE
: 
13408                 error 
= snapshot_create(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
); 
13410         case SNAPSHOT_OP_DELETE
: 
13411                 error 
= snapshot_delete(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
); 
13413         case SNAPSHOT_OP_RENAME
: 
13414                 error 
= snapshot_rename(uap
->dirfd
, uap
->name1
, uap
->name2
, 
13417         case SNAPSHOT_OP_MOUNT
: 
13418                 error 
= snapshot_mount(uap
->dirfd
, uap
->name1
, uap
->name2
, 
13419                     uap
->data
, uap
->flags
, ctx
); 
13421         case SNAPSHOT_OP_REVERT
: 
13422                 error 
= snapshot_revert(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
); 
13424 #if CONFIG_MNT_ROOTSNAP 
13425         case SNAPSHOT_OP_ROOT
: 
13426                 error 
= snapshot_root(uap
->dirfd
, uap
->name1
, uap
->flags
, ctx
); 
13428 #endif /* CONFIG_MNT_ROOTSNAP */