2  * Copyright (c) 2000-2008 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 
  30  * Copyright (c) 1989, 1993 
  31  *      The Regents of the University of California.  All rights reserved. 
  32  * (c) UNIX System Laboratories, Inc. 
  33  * All or some portions of this file are derived from material licensed 
  34  * to the University of California by American Telephone and Telegraph 
  35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with 
  36  * the permission of UNIX System Laboratories, Inc. 
  38  * Redistribution and use in source and binary forms, with or without 
  39  * modification, are permitted provided that the following conditions 
  41  * 1. Redistributions of source code must retain the above copyright 
  42  *    notice, this list of conditions and the following disclaimer. 
  43  * 2. Redistributions in binary form must reproduce the above copyright 
  44  *    notice, this list of conditions and the following disclaimer in the 
  45  *    documentation and/or other materials provided with the distribution. 
  46  * 3. All advertising materials mentioning features or use of this software 
  47  *    must display the following acknowledgement: 
  48  *      This product includes software developed by the University of 
  49  *      California, Berkeley and its contributors. 
  50  * 4. Neither the name of the University nor the names of its contributors 
  51  *    may be used to endorse or promote products derived from this software 
  52  *    without specific prior written permission. 
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  66  *      @(#)vfs_subr.c  8.31 (Berkeley) 5/26/95 
  69  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 
  70  * support for mandatory and extensible security protections.  This notice 
  71  * is included in support of clause 2.2 (b) of the Apple Public License, 
  76  * External virtual filesystem routines 
  80 #include <sys/param.h> 
  81 #include <sys/systm.h> 
  82 #include <sys/proc_internal.h> 
  83 #include <sys/kauth.h> 
  84 #include <sys/mount_internal.h> 
  87 #include <sys/vnode.h> 
  88 #include <sys/vnode_internal.h> 
  90 #include <sys/namei.h> 
  91 #include <sys/ucred.h> 
  92 #include <sys/buf_internal.h> 
  93 #include <sys/errno.h> 
  94 #include <sys/malloc.h> 
  95 #include <sys/uio_internal.h> 
  97 #include <sys/domain.h> 
  99 #include <sys/syslog.h> 
 100 #include <sys/ubc_internal.h> 
 102 #include <sys/sysctl.h> 
 103 #include <sys/filedesc.h> 
 104 #include <sys/event.h> 
 105 #include <sys/kdebug.h> 
 106 #include <sys/kauth.h> 
 107 #include <sys/user.h> 
 108 #include <miscfs/fifofs/fifo.h> 
 111 #include <machine/spl.h> 
 114 #include <kern/assert.h> 
 116 #include <miscfs/specfs/specdev.h> 
 118 #include <mach/mach_types.h> 
 119 #include <mach/memory_object_types.h> 
 121 #include <kern/kalloc.h>        /* kalloc()/kfree() */ 
 122 #include <kern/clock.h>         /* delay_for_interval() */ 
 123 #include <libkern/OSAtomic.h>   /* OSAddAtomic() */ 
 126 #include <vm/vm_protos.h>       /* vnode_pager_vrele() */ 
 129 #include <security/mac_framework.h> 
 132 extern lck_grp_t 
*vnode_lck_grp
; 
 133 extern lck_attr_t 
*vnode_lck_attr
; 
 136 extern lck_mtx_t 
* mnt_list_mtx_lock
; 
 138 enum vtype iftovt_tab
[16] = { 
 139         VNON
, VFIFO
, VCHR
, VNON
, VDIR
, VNON
, VBLK
, VNON
, 
 140         VREG
, VNON
, VLNK
, VNON
, VSOCK
, VNON
, VNON
, VBAD
, 
 142 int     vttoif_tab
[9] = { 
 143         0, S_IFREG
, S_IFDIR
, S_IFBLK
, S_IFCHR
, S_IFLNK
, 
 144         S_IFSOCK
, S_IFIFO
, S_IFMT
, 
 147 /* XXX next protptype should be from <nfs/nfs.h> */ 
 148 extern int       nfs_vinvalbuf(vnode_t
, int, vfs_context_t
, int); 
 150 /* XXX next prototytype should be from libsa/stdlib.h> but conflicts libkern */ 
 151 __private_extern__ 
void qsort( 
 155     int (*)(const void *, const void *)); 
 157 extern kern_return_t 
adjust_vm_object_cache(vm_size_t oval
, vm_size_t nval
); 
 158 __private_extern__ 
void vntblinit(void); 
 159 __private_extern__ kern_return_t 
reset_vmobjectcache(unsigned int val1
, 
 161 __private_extern__ 
int unlink1(vfs_context_t
, struct nameidata 
*, int); 
 163 static void vnode_list_add(vnode_t
); 
 164 static void vnode_list_remove(vnode_t
); 
 165 static void vnode_list_remove_locked(vnode_t
); 
 167 static errno_t 
vnode_drain(vnode_t
); 
 168 static void vgone(vnode_t
, int flags
); 
 169 static void vclean(vnode_t vp
, int flag
); 
 170 static void vnode_reclaim_internal(vnode_t
, int, int, int); 
 172 static void vnode_dropiocount (vnode_t
); 
 173 static errno_t 
vnode_getiocount(vnode_t vp
, int vid
, int vflags
); 
 174 static int vget_internal(vnode_t
, int, int); 
 176 static vnode_t 
checkalias(vnode_t vp
, dev_t nvp_rdev
); 
 177 static int  vnode_reload(vnode_t
); 
 178 static int  vnode_isinuse_locked(vnode_t
, int, int); 
 180 static void insmntque(vnode_t vp
, mount_t mp
); 
 181 static int mount_getvfscnt(void); 
 182 static int mount_fillfsids(fsid_t 
*, int ); 
 183 static void vnode_iterate_setup(mount_t
); 
 184 static int vnode_umount_preflight(mount_t
, vnode_t
, int); 
 185 static int vnode_iterate_prepare(mount_t
); 
 186 static int vnode_iterate_reloadq(mount_t
); 
 187 static void vnode_iterate_clear(mount_t
); 
 189 errno_t 
rmdir_remove_orphaned_appleDouble(vnode_t
, vfs_context_t
, int *);  
 191 TAILQ_HEAD(freelst
, vnode
) vnode_free_list
;     /* vnode free list */ 
 192 TAILQ_HEAD(deadlst
, vnode
) vnode_dead_list
;     /* vnode dead list */ 
 194 TAILQ_HEAD(ragelst
, vnode
) vnode_rage_list
;     /* vnode rapid age list */ 
 195 struct timeval rage_tv
; 
 199 #define RAGE_LIMIT_MIN  100 
 200 #define RAGE_TIME_LIMIT 5 
 202 struct mntlist mountlist
;                       /* mounted filesystem list */ 
 203 static int nummounts 
= 0; 
 206 #define VLISTCHECK(fun, vp, list)       \ 
 207         if ((vp)->v_freelist.tqe_prev == (struct vnode **)0xdeadb) \ 
 208                 panic("%s: %s vnode not on %slist", (fun), (list), (list)); 
 210 #define VLISTCHECK(fun, vp, list) 
 211 #endif /* DIAGNOSTIC */ 
 213 #define VLISTNONE(vp)   \ 
 215                 (vp)->v_freelist.tqe_next = (struct vnode *)0;  \ 
 216                 (vp)->v_freelist.tqe_prev = (struct vnode **)0xdeadb;   \ 
 219 #define VONLIST(vp)     \ 
 220         ((vp)->v_freelist.tqe_prev != (struct vnode **)0xdeadb) 
 222 /* remove a vnode from free vnode list */ 
 223 #define VREMFREE(fun, vp)       \ 
 225                 VLISTCHECK((fun), (vp), "free");        \ 
 226                 TAILQ_REMOVE(&vnode_free_list, (vp), v_freelist);       \ 
 233 /* remove a vnode from dead vnode list */ 
 234 #define VREMDEAD(fun, vp)       \ 
 236                 VLISTCHECK((fun), (vp), "dead");        \ 
 237                 TAILQ_REMOVE(&vnode_dead_list, (vp), v_freelist);       \ 
 239                 vp->v_listflag &= ~VLIST_DEAD;  \ 
 244 /* remove a vnode from rage vnode list */ 
 245 #define VREMRAGE(fun, vp)       \ 
 247                 if ( !(vp->v_listflag & VLIST_RAGE))                    \ 
 248                         panic("VREMRAGE: vp not on rage list");         \ 
 249                 VLISTCHECK((fun), (vp), "rage");                        \ 
 250                 TAILQ_REMOVE(&vnode_rage_list, (vp), v_freelist);       \ 
 252                 vp->v_listflag &= ~VLIST_RAGE;  \ 
 258  * vnodetarget hasn't been used in a long time, but 
 259  * it was exported for some reason... I'm leaving in 
 260  * place for now...  it should be deprecated out of the 
 261  * exports and removed eventually. 
 263 unsigned long vnodetarget
;              /* target for vnreclaim() */ 
 264 #define VNODE_FREE_TARGET       20      /* Default value for vnodetarget */ 
 267  * We need quite a few vnodes on the free list to sustain the 
 268  * rapid stat() the compilation process does, and still benefit from the name 
 269  * cache. Having too few vnodes on the free list causes serious disk 
 270  * thrashing as we cycle through them. 
 272 #define VNODE_FREE_MIN          CONFIG_VNODE_FREE_MIN   /* freelist should have at least this many */ 
 275  * Initialize the vnode management data structures. 
 277 __private_extern__ 
void 
 280         TAILQ_INIT(&vnode_free_list
); 
 281         TAILQ_INIT(&vnode_rage_list
); 
 282         TAILQ_INIT(&vnode_dead_list
); 
 283         TAILQ_INIT(&mountlist
); 
 286                 vnodetarget 
= VNODE_FREE_TARGET
; 
 288         microuptime(&rage_tv
); 
 289         rage_limit 
= desiredvnodes 
/ 100; 
 291         if (rage_limit 
< RAGE_LIMIT_MIN
) 
 292                 rage_limit 
= RAGE_LIMIT_MIN
; 
 295          * Scale the vm_object_cache to accomodate the vnodes  
 298         (void) adjust_vm_object_cache(0, desiredvnodes 
- VNODE_FREE_MIN
); 
 301 /* Reset the VM Object Cache with the values passed in */ 
 302 __private_extern__ kern_return_t
 
 303 reset_vmobjectcache(unsigned int val1
, unsigned int val2
) 
 305         vm_size_t oval 
= val1 
- VNODE_FREE_MIN
; 
 308         if(val2 
< VNODE_FREE_MIN
) 
 311                 nval 
= val2 
- VNODE_FREE_MIN
; 
 313         return(adjust_vm_object_cache(oval
, nval
)); 
 317 /* the timeout is in 10 msecs */ 
 319 vnode_waitforwrites(vnode_t vp
, int output_target
, int slpflag
, int slptimeout
, const char *msg
) { 
 323         KERNEL_DEBUG(0x3010280 | DBG_FUNC_START
, (int)vp
, output_target
, vp
->v_numoutput
, 0, 0); 
 325         if (vp
->v_numoutput 
> output_target
) { 
 331                 while ((vp
->v_numoutput 
> output_target
) && error 
== 0) { 
 333                                 vp
->v_flag 
|= VTHROTTLED
; 
 335                                 vp
->v_flag 
|= VBWAIT
; 
 337                         ts
.tv_sec 
= (slptimeout
/100); 
 338                         ts
.tv_nsec 
= (slptimeout 
% 1000)  * 10 * NSEC_PER_USEC 
* 1000 ; 
 339                         error 
= msleep((caddr_t
)&vp
->v_numoutput
, &vp
->v_lock
, (slpflag 
| (PRIBIO 
+ 1)), msg
, &ts
); 
 343         KERNEL_DEBUG(0x3010280 | DBG_FUNC_END
, (int)vp
, output_target
, vp
->v_numoutput
, error
, 0); 
 350 vnode_startwrite(vnode_t vp
) { 
 352         OSAddAtomic(1, &vp
->v_numoutput
); 
 357 vnode_writedone(vnode_t vp
) 
 360                 OSAddAtomic(-1, &vp
->v_numoutput
); 
 362                 if (vp
->v_numoutput 
<= 1) { 
 367                         if (vp
->v_numoutput 
< 0) 
 368                                 panic("vnode_writedone: numoutput < 0"); 
 370                         if ((vp
->v_flag 
& VTHROTTLED
) && (vp
->v_numoutput 
<= 1)) { 
 371                                 vp
->v_flag 
&= ~VTHROTTLED
; 
 374                         if ((vp
->v_flag 
& VBWAIT
) && (vp
->v_numoutput 
== 0)) { 
 375                                 vp
->v_flag 
&= ~VBWAIT
; 
 381                                 wakeup((caddr_t
)&vp
->v_numoutput
); 
 389 vnode_hasdirtyblks(vnode_t vp
) 
 391         struct cl_writebehind 
*wbp
; 
 394          * Not taking the buf_mtxp as there is little 
 395          * point doing it. Even if the lock is taken the 
 396          * state can change right after that. If their  
 397          * needs to be a synchronization, it must be driven 
 400         if (vp
->v_dirtyblkhd
.lh_first
) 
 403         if (!UBCINFOEXISTS(vp
)) 
 406         wbp 
= vp
->v_ubcinfo
->cl_wbehind
; 
 408         if (wbp 
&& (wbp
->cl_number 
|| wbp
->cl_scmap
)) 
 415 vnode_hascleanblks(vnode_t vp
) 
 418          * Not taking the buf_mtxp as there is little 
 419          * point doing it. Even if the lock is taken the 
 420          * state can change right after that. If their  
 421          * needs to be a synchronization, it must be driven 
 424         if (vp
->v_cleanblkhd
.lh_first
) 
 430 vnode_iterate_setup(mount_t mp
) 
 432         while (mp
->mnt_lflag 
& MNT_LITER
) { 
 433                 mp
->mnt_lflag 
|= MNT_LITERWAIT
; 
 434                 msleep((caddr_t
)mp
, &mp
->mnt_mlock
, PVFS
, "vnode_iterate_setup", NULL
);  
 437         mp
->mnt_lflag 
|= MNT_LITER
; 
 442 vnode_umount_preflight(mount_t mp
, vnode_t skipvp
, int flags
) 
 446         TAILQ_FOREACH(vp
, &mp
->mnt_vnodelist
, v_mntvnodes
) { 
 447                 /* disable preflight only for udf, a hack to be removed after 4073176 is fixed */ 
 448                 if (vp
->v_tag 
== VT_UDF
) 
 450                 if (vp
->v_type 
== VDIR
) 
 454                 if ((flags 
& SKIPSYSTEM
) && ((vp
->v_flag 
& VSYSTEM
) || 
 455             (vp
->v_flag 
& VNOFLUSH
))) 
 457                 if ((flags 
& SKIPSWAP
) && (vp
->v_flag 
& VSWAP
)) 
 459                 if ((flags 
& WRITECLOSE
) && 
 460             (vp
->v_writecount 
== 0 || vp
->v_type 
!= VREG
))  
 462                 /* Look for busy vnode */ 
 463         if (((vp
->v_usecount 
!= 0) && 
 464             ((vp
->v_usecount 
- vp
->v_kusecount
) != 0)))  
 472  * This routine prepares iteration by moving all the vnodes to worker queue 
 473  * called with mount lock held 
 476 vnode_iterate_prepare(mount_t mp
) 
 480         if (TAILQ_EMPTY(&mp
->mnt_vnodelist
)) { 
 485         vp 
= TAILQ_FIRST(&mp
->mnt_vnodelist
); 
 486         vp
->v_mntvnodes
.tqe_prev 
= &(mp
->mnt_workerqueue
.tqh_first
); 
 487         mp
->mnt_workerqueue
.tqh_first 
= mp
->mnt_vnodelist
.tqh_first
; 
 488         mp
->mnt_workerqueue
.tqh_last 
= mp
->mnt_vnodelist
.tqh_last
; 
 490         TAILQ_INIT(&mp
->mnt_vnodelist
); 
 491         if (mp
->mnt_newvnodes
.tqh_first 
!= NULL
) 
 492                 panic("vnode_iterate_prepare: newvnode when entering vnode"); 
 493         TAILQ_INIT(&mp
->mnt_newvnodes
); 
 499 /* called with mount lock held */ 
 501 vnode_iterate_reloadq(mount_t mp
) 
 505         /* add the remaining entries in workerq to the end of mount vnode list */ 
 506         if (!TAILQ_EMPTY(&mp
->mnt_workerqueue
)) { 
 508                 mvp 
= TAILQ_LAST(&mp
->mnt_vnodelist
, vnodelst
); 
 510                 /* Joining the workerque entities to mount vnode list */ 
 512                         mvp
->v_mntvnodes
.tqe_next 
= mp
->mnt_workerqueue
.tqh_first
; 
 514                         mp
->mnt_vnodelist
.tqh_first 
= mp
->mnt_workerqueue
.tqh_first
; 
 515                 mp
->mnt_workerqueue
.tqh_first
->v_mntvnodes
.tqe_prev 
= mp
->mnt_vnodelist
.tqh_last
; 
 516                 mp
->mnt_vnodelist
.tqh_last 
= mp
->mnt_workerqueue
.tqh_last
; 
 517                 TAILQ_INIT(&mp
->mnt_workerqueue
); 
 520         /* add the newvnodes to the head of mount vnode list */ 
 521         if (!TAILQ_EMPTY(&mp
->mnt_newvnodes
)) { 
 523                 nlvp 
= TAILQ_LAST(&mp
->mnt_newvnodes
, vnodelst
); 
 525                 mp
->mnt_newvnodes
.tqh_first
->v_mntvnodes
.tqe_prev 
= &mp
->mnt_vnodelist
.tqh_first
; 
 526                 nlvp
->v_mntvnodes
.tqe_next 
= mp
->mnt_vnodelist
.tqh_first
; 
 527                 if(mp
->mnt_vnodelist
.tqh_first
)  
 528                         mp
->mnt_vnodelist
.tqh_first
->v_mntvnodes
.tqe_prev 
= &nlvp
->v_mntvnodes
.tqe_next
; 
 530                         mp
->mnt_vnodelist
.tqh_last 
= mp
->mnt_newvnodes
.tqh_last
; 
 531                 mp
->mnt_vnodelist
.tqh_first 
= mp
->mnt_newvnodes
.tqh_first
; 
 532                 TAILQ_INIT(&mp
->mnt_newvnodes
); 
 541 vnode_iterate_clear(mount_t mp
) 
 543         mp
->mnt_lflag 
&= ~MNT_LITER
; 
 544         if (mp
->mnt_lflag 
& MNT_LITERWAIT
) { 
 545                 mp
->mnt_lflag 
&= ~MNT_LITERWAIT
; 
 552 vnode_iterate(mount_t mp
, int flags
, int (*callout
)(struct vnode 
*, void *), 
 561         vnode_iterate_setup(mp
); 
 563         /* it is returns 0 then there is nothing to do */ 
 564         retval 
= vnode_iterate_prepare(mp
); 
 567                 vnode_iterate_clear(mp
); 
 572         /* iterate over all the vnodes */ 
 573         while (!TAILQ_EMPTY(&mp
->mnt_workerqueue
)) { 
 574                 vp 
= TAILQ_FIRST(&mp
->mnt_workerqueue
); 
 575                 TAILQ_REMOVE(&mp
->mnt_workerqueue
, vp
, v_mntvnodes
); 
 576                 TAILQ_INSERT_TAIL(&mp
->mnt_vnodelist
, vp
, v_mntvnodes
); 
 578                 if ((vp
->v_data 
== NULL
) || (vp
->v_type 
== VNON
) || (vp
->v_mount 
!= mp
)) { 
 583                 if ( vget_internal(vp
, vid
, (flags 
| VNODE_NODEAD
| VNODE_WITHID 
| VNODE_NOSUSPEND
))) { 
 587                 if (flags 
& VNODE_RELOAD
) { 
 589                          * we're reloading the filesystem 
 590                          * cast out any inactive vnodes... 
 592                         if (vnode_reload(vp
)) { 
 593                                 /* vnode will be recycled on the refcount drop */ 
 600                 retval 
= callout(vp
, arg
); 
 604                   case VNODE_RETURNED_DONE
: 
 606                           if (retval 
== VNODE_RETURNED_DONE
) { 
 613                   case VNODE_CLAIMED_DONE
: 
 625         (void)vnode_iterate_reloadq(mp
); 
 626         vnode_iterate_clear(mp
); 
 632 mount_lock_renames(mount_t mp
) 
 634         lck_mtx_lock(&mp
->mnt_renamelock
); 
 638 mount_unlock_renames(mount_t mp
) 
 640         lck_mtx_unlock(&mp
->mnt_renamelock
); 
 644 mount_lock(mount_t mp
) 
 646         lck_mtx_lock(&mp
->mnt_mlock
); 
 650 mount_unlock(mount_t mp
) 
 652         lck_mtx_unlock(&mp
->mnt_mlock
); 
 657 mount_ref(mount_t mp
, int locked
) 
 670 mount_drop(mount_t mp
, int locked
) 
 677         if (mp
->mnt_count 
== 0 && (mp
->mnt_lflag 
& MNT_LDRAIN
)) 
 678                 wakeup(&mp
->mnt_lflag
); 
 686 mount_iterref(mount_t mp
, int locked
) 
 692         if (mp
->mnt_iterref 
< 0) { 
 703 mount_isdrained(mount_t mp
, int locked
) 
 709         if (mp
->mnt_iterref 
< 0) 
 719 mount_iterdrop(mount_t mp
) 
 723         wakeup(&mp
->mnt_iterref
); 
 728 mount_iterdrain(mount_t mp
) 
 731         while (mp
->mnt_iterref
) 
 732                 msleep((caddr_t
)&mp
->mnt_iterref
, mnt_list_mtx_lock
, PVFS
, "mount_iterdrain", NULL
); 
 733         /* mount iterations drained */ 
 734         mp
->mnt_iterref 
= -1; 
 738 mount_iterreset(mount_t mp
) 
 741         if (mp
->mnt_iterref 
== -1) 
 746 /* always called with  mount lock held */ 
 748 mount_refdrain(mount_t mp
) 
 750         if (mp
->mnt_lflag 
& MNT_LDRAIN
) 
 751                 panic("already in drain"); 
 752         mp
->mnt_lflag 
|= MNT_LDRAIN
; 
 754         while (mp
->mnt_count
) 
 755                 msleep((caddr_t
)&mp
->mnt_lflag
, &mp
->mnt_mlock
, PVFS
, "mount_drain", NULL
); 
 757         if (mp
->mnt_vnodelist
.tqh_first 
!= NULL
) 
 758                  panic("mount_refdrain: dangling vnode");  
 760         mp
->mnt_lflag 
&= ~MNT_LDRAIN
; 
 767  * Mark a mount point as busy. Used to synchronize access and to delay 
 771 vfs_busy(mount_t mp
, int flags
) 
 775         if (mp
->mnt_lflag 
& MNT_LDEAD
) 
 778         if (mp
->mnt_lflag 
& MNT_LUNMOUNT
) { 
 779                 if (flags 
& LK_NOWAIT
) 
 784                 if (mp
->mnt_lflag 
& MNT_LDEAD
) { 
 788                 if (mp
->mnt_lflag 
& MNT_LUNMOUNT
) { 
 789                         mp
->mnt_lflag 
|= MNT_LWAIT
; 
 791                          * Since all busy locks are shared except the exclusive 
 792                          * lock granted when unmounting, the only place that a 
 793                          * wakeup needs to be done is at the release of the 
 794                          * exclusive lock at the end of dounmount. 
 796                         msleep((caddr_t
)mp
, &mp
->mnt_mlock
, (PVFS 
| PDROP
), "vfsbusy", NULL
); 
 802         lck_rw_lock_shared(&mp
->mnt_rwlock
); 
 805          * until we are granted the rwlock, it's possible for the mount point to 
 806          * change state, so reevaluate before granting the vfs_busy 
 808         if (mp
->mnt_lflag 
& (MNT_LDEAD 
| MNT_LUNMOUNT
)) { 
 809                 lck_rw_done(&mp
->mnt_rwlock
); 
 816  * Free a busy filesystem. 
 820 vfs_unbusy(mount_t mp
) 
 822         lck_rw_done(&mp
->mnt_rwlock
); 
 828 vfs_rootmountfailed(mount_t mp
) { 
 831         mp
->mnt_vtable
->vfc_refcount
--; 
 836         mount_lock_destroy(mp
); 
 839         mac_mount_label_destroy(mp
); 
 842         FREE_ZONE(mp
, sizeof(struct mount
), M_MOUNT
); 
 846  * Lookup a filesystem type, and if found allocate and initialize 
 847  * a mount structure for it. 
 849  * Devname is usually updated by mount(8) after booting. 
 852 vfs_rootmountalloc_internal(struct vfstable 
*vfsp
, const char *devname
) 
 856         mp 
= _MALLOC_ZONE((u_long
)sizeof(struct mount
), M_MOUNT
, M_WAITOK
); 
 857         bzero((char *)mp
, (u_long
)sizeof(struct mount
)); 
 859         /* Initialize the default IO constraints */ 
 860         mp
->mnt_maxreadcnt 
= mp
->mnt_maxwritecnt 
= MAXPHYS
; 
 861         mp
->mnt_segreadcnt 
= mp
->mnt_segwritecnt 
= 32; 
 862         mp
->mnt_maxsegreadsize 
= mp
->mnt_maxreadcnt
; 
 863         mp
->mnt_maxsegwritesize 
= mp
->mnt_maxwritecnt
; 
 864         mp
->mnt_devblocksize 
= DEV_BSIZE
; 
 865         mp
->mnt_alignmentmask 
= PAGE_MASK
; 
 867         mp
->mnt_realrootvp 
= NULLVP
; 
 868         mp
->mnt_authcache_ttl 
= CACHED_LOOKUP_RIGHT_TTL
; 
 871         (void)vfs_busy(mp
, LK_NOWAIT
); 
 873         TAILQ_INIT(&mp
->mnt_vnodelist
); 
 874         TAILQ_INIT(&mp
->mnt_workerqueue
); 
 875         TAILQ_INIT(&mp
->mnt_newvnodes
); 
 877         mp
->mnt_vtable 
= vfsp
; 
 878         mp
->mnt_op 
= vfsp
->vfc_vfsops
; 
 879         mp
->mnt_flag 
= MNT_RDONLY 
| MNT_ROOTFS
; 
 880         mp
->mnt_vnodecovered 
= NULLVP
; 
 881         //mp->mnt_stat.f_type = vfsp->vfc_typenum; 
 882         mp
->mnt_flag 
|= vfsp
->vfc_flags 
& MNT_VISFLAGMASK
; 
 885         vfsp
->vfc_refcount
++; 
 888         strncpy(mp
->mnt_vfsstat
.f_fstypename
, vfsp
->vfc_name
, MFSTYPENAMELEN
); 
 889         mp
->mnt_vfsstat
.f_mntonname
[0] = '/'; 
 890         /* XXX const poisoning layering violation */ 
 891         (void) copystr((const void *)devname
, mp
->mnt_vfsstat
.f_mntfromname
, MAXPATHLEN 
- 1, NULL
); 
 894         mac_mount_label_init(mp
); 
 895         mac_mount_label_associate(vfs_context_kernel(), mp
); 
 901 vfs_rootmountalloc(const char *fstypename
, const char *devname
, mount_t 
*mpp
) 
 903         struct vfstable 
*vfsp
; 
 905         for (vfsp 
= vfsconf
; vfsp
; vfsp 
= vfsp
->vfc_next
) 
 906                 if (!strncmp(vfsp
->vfc_name
, fstypename
, 
 907                              sizeof(vfsp
->vfc_name
))) 
 912         *mpp 
= vfs_rootmountalloc_internal(vfsp
, devname
); 
 922  * Find an appropriate filesystem to use for the root. If a filesystem 
 923  * has not been preselected, walk through the list of known filesystems 
 924  * trying those that have mountroot routines, and try them until one 
 925  * works or we have tried them all. 
 927 extern int (*mountroot
)(void); 
 935         struct vfstable 
*vfsp
; 
 936         vfs_context_t ctx 
= vfs_context_kernel(); 
 937         struct vfs_attr vfsattr
; 
 940         vnode_t bdevvp_rootvp
; 
 942         if (mountroot 
!= NULL
) { 
 944                  * used for netboot which follows a different set of rules 
 946                 error 
= (*mountroot
)(); 
 949         if ((error 
= bdevvp(rootdev
, &rootvp
))) { 
 950                 printf("vfs_mountroot: can't setup bdevvp\n"); 
 954          * 4951998 - code we call in vfc_mountroot may replace rootvp  
 955          * so keep a local copy for some house keeping. 
 957         bdevvp_rootvp 
= rootvp
; 
 959         for (vfsp 
= vfsconf
; vfsp
; vfsp 
= vfsp
->vfc_next
) { 
 960                 if (vfsp
->vfc_mountroot 
== NULL
) 
 963                 mp 
= vfs_rootmountalloc_internal(vfsp
, "root_device"); 
 964                 mp
->mnt_devvp 
= rootvp
; 
 966                 if ((error 
= (*vfsp
->vfc_mountroot
)(mp
, rootvp
, ctx
)) == 0) { 
 967                         if ( bdevvp_rootvp 
!= rootvp 
) { 
 970                                  *   bump the iocount and fix up mnt_devvp for the 
 971                                  *   new rootvp (it will already have a usecount taken)... 
 972                                  *   drop the iocount and the usecount on the orignal 
 973                                  *   since we are no longer going to use it... 
 975                                 vnode_getwithref(rootvp
); 
 976                                 mp
->mnt_devvp 
= rootvp
; 
 978                                 vnode_rele(bdevvp_rootvp
); 
 979                                 vnode_put(bdevvp_rootvp
); 
 981                         mp
->mnt_devvp
->v_specflags 
|= SI_MOUNTEDON
; 
 988                          *   cache the IO attributes for the underlying physical media... 
 989                          *   an error return indicates the underlying driver doesn't 
 990                          *   support all the queries necessary... however, reasonable 
 991                          *   defaults will have been set, so no reason to bail or care 
 993                         vfs_init_io_attributes(rootvp
, mp
); 
 996                          * Shadow the VFC_VFSNATIVEXATTR flag to MNTK_EXTENDED_ATTRS. 
 998                         if (mp
->mnt_vtable
->vfc_vfsflags 
& VFC_VFSNATIVEXATTR
) { 
 999                                 mp
->mnt_kern_flag 
|= MNTK_EXTENDED_ATTRS
; 
1001                         if (mp
->mnt_vtable
->vfc_vfsflags 
& VFC_VFSPREFLIGHT
) { 
1002                                 mp
->mnt_kern_flag 
|= MNTK_UNMOUNT_PREFLIGHT
; 
1006                          * Probe root file system for additional features. 
1008                         (void)VFS_START(mp
, 0, ctx
); 
1010                         VFSATTR_INIT(&vfsattr
); 
1011                         VFSATTR_WANTED(&vfsattr
, f_capabilities
); 
1012                         if (vfs_getattr(mp
, &vfsattr
, ctx
) == 0 &&  
1013                             VFSATTR_IS_SUPPORTED(&vfsattr
, f_capabilities
)) { 
1014                                 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
) && 
1015                                     (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_EXTENDED_ATTR
)) { 
1016                                         mp
->mnt_kern_flag 
|= MNTK_EXTENDED_ATTRS
; 
1019                                 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
) && 
1020                                     (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_INTERFACES
] & VOL_CAP_INT_NAMEDSTREAMS
)) { 
1021                                         mp
->mnt_kern_flag 
|= MNTK_NAMED_STREAMS
; 
1024                                 if ((vfsattr
.f_capabilities
.capabilities
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
) && 
1025                                     (vfsattr
.f_capabilities
.valid
[VOL_CAPABILITIES_FORMAT
] & VOL_CAP_FMT_PATH_FROM_ID
)) { 
1026                                         mp
->mnt_kern_flag 
|= MNTK_PATH_FROM_ID
; 
1031                          * get rid of iocount reference returned 
1032                          * by bdevvp (or picked up by us on the substitued 
1033                          * rootvp)... it (or we) will have also taken 
1034                          * a usecount reference which we want to keep 
1039                         if ((vfs_flags(mp
) & MNT_MULTILABEL
) == 0) 
1042                         error 
= VFS_ROOT(mp
, &vp
, ctx
); 
1044                                 printf("%s() VFS_ROOT() returned %d\n", 
1046                                 dounmount(mp
, MNT_FORCE
, 0, ctx
); 
1050                         /* VFS_ROOT provides reference so flags = 0 */ 
1051                         error 
= vnode_label(mp
, NULL
, vp
, NULL
, 0, ctx
); 
1053                                 printf("%s() vnode_label() returned %d\n", 
1055                                 dounmount(mp
, MNT_FORCE
, 0, ctx
); 
1064                 vfs_rootmountfailed(mp
); 
1066                 if (error 
!= EINVAL
) 
1067                         printf("%s_mountroot failed: %d\n", vfsp
->vfc_name
, error
); 
1073  * Lookup a mount point by filesystem identifier. 
1075 extern mount_t 
vfs_getvfs_locked(fsid_t 
*); 
1078 vfs_getvfs(fsid_t 
*fsid
) 
1080         return (mount_list_lookupby_fsid(fsid
, 0, 0)); 
1084 vfs_getvfs_locked(fsid_t 
*fsid
) 
1086         return(mount_list_lookupby_fsid(fsid
, 1, 0)); 
1090 vfs_getvfs_by_mntonname(char *path
) 
1092         mount_t retmp 
= (mount_t
)0; 
1096         TAILQ_FOREACH(mp
, &mountlist
, mnt_list
) { 
1097                 if (!strncmp(mp
->mnt_vfsstat
.f_mntonname
, path
, 
1098                                         sizeof(mp
->mnt_vfsstat
.f_mntonname
))) { 
1104         mount_list_unlock(); 
1108 /* generation number for creation of new fsids */ 
1109 u_short mntid_gen 
= 0; 
1111  * Get a new unique fsid 
1114 vfs_getnewfsid(struct mount 
*mp
) 
1123         /* generate a new fsid */ 
1124         mtype 
= mp
->mnt_vtable
->vfc_typenum
; 
1125         if (++mntid_gen 
== 0) 
1127         tfsid
.val
[0] = makedev(nblkdev 
+ mtype
, mntid_gen
); 
1128         tfsid
.val
[1] = mtype
; 
1130         TAILQ_FOREACH(nmp
, &mountlist
, mnt_list
) { 
1131                 while (vfs_getvfs_locked(&tfsid
)) { 
1132                         if (++mntid_gen 
== 0) 
1134                         tfsid
.val
[0] = makedev(nblkdev 
+ mtype
, mntid_gen
); 
1137         mp
->mnt_vfsstat
.f_fsid
.val
[0] = tfsid
.val
[0]; 
1138         mp
->mnt_vfsstat
.f_fsid
.val
[1] = tfsid
.val
[1]; 
1139         mount_list_unlock(); 
1143  * Routines having to do with the management of the vnode table. 
1145 extern int (**dead_vnodeop_p
)(void *); 
1146 long numvnodes
, freevnodes
, deadvnodes
; 
1150  * Move a vnode from one mount queue to another. 
1153 insmntque(vnode_t vp
, mount_t mp
) 
1157          * Delete from old mount point vnode list, if on one. 
1159         if ( (lmp 
= vp
->v_mount
) != NULL 
&& lmp 
!= dead_mountp
) { 
1160                 if ((vp
->v_lflag 
& VNAMED_MOUNT
) == 0) 
1161                         panic("insmntque: vp not in mount vnode list"); 
1162                 vp
->v_lflag 
&= ~VNAMED_MOUNT
; 
1168                 if (vp
->v_mntvnodes
.tqe_next 
== NULL
) { 
1169                         if (TAILQ_LAST(&lmp
->mnt_vnodelist
, vnodelst
) == vp
) 
1170                                 TAILQ_REMOVE(&lmp
->mnt_vnodelist
, vp
, v_mntvnodes
); 
1171                         else if (TAILQ_LAST(&lmp
->mnt_newvnodes
, vnodelst
) == vp
) 
1172                                 TAILQ_REMOVE(&lmp
->mnt_newvnodes
, vp
, v_mntvnodes
); 
1173                         else if (TAILQ_LAST(&lmp
->mnt_workerqueue
, vnodelst
) == vp
) 
1174                                 TAILQ_REMOVE(&lmp
->mnt_workerqueue
, vp
, v_mntvnodes
); 
1176                         vp
->v_mntvnodes
.tqe_next
->v_mntvnodes
.tqe_prev 
= vp
->v_mntvnodes
.tqe_prev
; 
1177                         *vp
->v_mntvnodes
.tqe_prev 
= vp
->v_mntvnodes
.tqe_next
; 
1179                 vp
->v_mntvnodes
.tqe_next 
= NULL
; 
1180                 vp
->v_mntvnodes
.tqe_prev 
= NULL
; 
1186          * Insert into list of vnodes for the new mount point, if available. 
1188         if ((vp
->v_mount 
= mp
) != NULL
) { 
1190                 if ((vp
->v_mntvnodes
.tqe_next 
!= 0) && (vp
->v_mntvnodes
.tqe_prev 
!= 0)) 
1191                         panic("vp already in mount list"); 
1192                 if (mp
->mnt_lflag 
& MNT_LITER
) 
1193                         TAILQ_INSERT_HEAD(&mp
->mnt_newvnodes
, vp
, v_mntvnodes
); 
1195                         TAILQ_INSERT_HEAD(&mp
->mnt_vnodelist
, vp
, v_mntvnodes
); 
1196                 if (vp
->v_lflag 
& VNAMED_MOUNT
) 
1197                         panic("insmntque: vp already in mount vnode list"); 
1198                 if ((vp
->v_freelist
.tqe_prev 
!= (struct vnode 
**)0xdeadb)) 
1199                         panic("insmntque: vp on the free list\n"); 
1200                 vp
->v_lflag 
|= VNAMED_MOUNT
; 
1208  * Create a vnode for a block device. 
1209  * Used for root filesystem, argdev, and swap areas. 
1210  * Also used for memory file system special devices. 
1213 bdevvp(dev_t dev
, vnode_t 
*vpp
) 
1217         struct vnode_fsparam vfsp
; 
1218         struct vfs_context context
; 
1225         context
.vc_thread 
= current_thread(); 
1226         context
.vc_ucred 
= FSCRED
; 
1228         vfsp
.vnfs_mp 
= (struct mount 
*)0; 
1229         vfsp
.vnfs_vtype 
= VBLK
; 
1230         vfsp
.vnfs_str 
= "bdevvp"; 
1231         vfsp
.vnfs_dvp 
= NULL
; 
1232         vfsp
.vnfs_fsnode 
= NULL
; 
1233         vfsp
.vnfs_cnp 
= NULL
; 
1234         vfsp
.vnfs_vops 
= spec_vnodeop_p
; 
1235         vfsp
.vnfs_rdev 
= dev
; 
1236         vfsp
.vnfs_filesize 
= 0; 
1238         vfsp
.vnfs_flags 
= VNFS_NOCACHE 
| VNFS_CANTCACHE
; 
1240         vfsp
.vnfs_marksystem 
= 0; 
1241         vfsp
.vnfs_markroot 
= 0; 
1243         if ( (error 
= vnode_create(VNCREATE_FLAVOR
, VCREATESIZE
, &vfsp
, &nvp
)) ) { 
1247         vnode_lock_spin(nvp
); 
1248         nvp
->v_flag 
|= VBDEVVP
; 
1249         nvp
->v_tag 
= VT_NON
;    /* set this to VT_NON so during aliasing it can be replaced */ 
1251         if ( (error 
= vnode_ref(nvp
)) ) { 
1252                 panic("bdevvp failed: vnode_ref"); 
1255         if ( (error 
= VNOP_FSYNC(nvp
, MNT_WAIT
, &context
)) ) { 
1256                 panic("bdevvp failed: fsync"); 
1259         if ( (error 
= buf_invalidateblks(nvp
, BUF_WRITE_DATA
, 0, 0)) ) { 
1260                 panic("bdevvp failed: invalidateblks"); 
1266          * XXXMAC: We can't put a MAC check here, the system will 
1267          * panic without this vnode. 
1271         if ( (error 
= VNOP_OPEN(nvp
, FREAD
, &context
)) ) { 
1272                 panic("bdevvp failed: open"); 
1281  * Check to see if the new vnode represents a special device 
1282  * for which we already have a vnode (either because of 
1283  * bdevvp() or because of a different vnode representing 
1284  * the same block device). If such an alias exists, deallocate 
1285  * the existing contents and return the aliased vnode. The 
1286  * caller is responsible for filling it with its new contents. 
1289 checkalias(struct vnode 
*nvp
, dev_t nvp_rdev
) 
1295         vpp 
= &speclisth
[SPECHASH(nvp_rdev
)]; 
1299         for (vp 
= *vpp
; vp
; vp 
= vp
->v_specnext
) { 
1300                 if (nvp_rdev 
== vp
->v_rdev 
&& nvp
->v_type 
== vp
->v_type
) { 
1308                 if (vnode_getwithvid(vp
,vid
)) { 
1312                  * Termination state is checked in vnode_getwithvid 
1317                  * Alias, but not in use, so flush it out. 
1319                 if ((vp
->v_iocount 
== 1) && (vp
->v_usecount 
== 0)) { 
1320                         vnode_reclaim_internal(vp
, 1, 1, 0); 
1321                         vnode_put_locked(vp
); 
1326         if (vp 
== NULL 
|| vp
->v_tag 
!= VT_NON
) { 
1328                 MALLOC_ZONE(nvp
->v_specinfo
, struct specinfo 
*, sizeof(struct specinfo
), 
1329                             M_SPECINFO
, M_WAITOK
); 
1330                 bzero(nvp
->v_specinfo
, sizeof(struct specinfo
)); 
1331                 nvp
->v_rdev 
= nvp_rdev
; 
1332                 nvp
->v_specflags 
= 0; 
1333                 nvp
->v_speclastr 
= -1; 
1336                 nvp
->v_hashchain 
= vpp
; 
1337                 nvp
->v_specnext 
= *vpp
; 
1342                         nvp
->v_flag 
|= VALIASED
; 
1343                         vp
->v_flag 
|= VALIASED
; 
1344                         vnode_put_locked(vp
); 
1349         if ((vp
->v_flag 
& (VBDEVVP 
| VDEVFLUSH
)) != 0) 
1352                 panic("checkalias with VT_NON vp that shouldn't: %x", (unsigned int)vp
); 
1360  * Get a reference on a particular vnode and lock it if requested. 
1361  * If the vnode was on the inactive list, remove it from the list. 
1362  * If the vnode was on the free list, remove it from the list and 
1363  * move it to inactive list as needed. 
1364  * The vnode lock bit is set if the vnode is being eliminated in 
1365  * vgone. The process is awakened when the transition is completed, 
1366  * and an error returned to indicate that the vnode is no longer 
1367  * usable (possibly having been changed to a new file system type). 
1370 vget_internal(vnode_t vp
, int vid
, int vflags
) 
1375         vnode_lock_spin(vp
); 
1377         if (vflags 
& VNODE_WITHID
) 
1380                 vpid 
= vp
->v_id
;    // save off the original v_id 
1382         if ((vflags 
& VNODE_WRITEABLE
) && (vp
->v_writecount 
== 0)) 
1384                  * vnode to be returned only if it has writers opened  
1388                 error 
= vnode_getiocount(vp
, vpid
, vflags
); 
1396  * Returns:     0                       Success 
1397  *              ENOENT                  No such file or directory [terminating] 
1400 vnode_ref(vnode_t vp
) 
1403         return (vnode_ref_ext(vp
, 0)); 
1407  * Returns:     0                       Success 
1408  *              ENOENT                  No such file or directory [terminating] 
1411 vnode_ref_ext(vnode_t vp
, int fmode
) 
1415         vnode_lock_spin(vp
); 
1418          * once all the current call sites have been fixed to insure they have 
1419          * taken an iocount, we can toughen this assert up and insist that the 
1420          * iocount is non-zero... a non-zero usecount doesn't insure correctness 
1422         if (vp
->v_iocount 
<= 0 && vp
->v_usecount 
<= 0)  
1423                 panic("vnode_ref_ext: vp %p has no valid reference %d, %d", vp
, vp
->v_iocount
, vp
->v_usecount
); 
1426          * if you are the owner of drain/termination, can acquire usecount 
1428         if ((vp
->v_lflag 
& (VL_DRAIN 
| VL_TERMINATE 
| VL_DEAD
))) { 
1429                 if (vp
->v_owner 
!= current_thread()) { 
1436         if (fmode 
& FWRITE
) { 
1437                 if (++vp
->v_writecount 
<= 0) 
1438                         panic("vnode_ref_ext: v_writecount"); 
1440         if (fmode 
& O_EVTONLY
) { 
1441                 if (++vp
->v_kusecount 
<= 0) 
1442                         panic("vnode_ref_ext: v_kusecount"); 
1444         if (vp
->v_flag 
& VRAGE
) { 
1447                 ut 
= get_bsdthread_info(current_thread()); 
1449                 if ( !(current_proc()->p_lflag 
& P_LRAGE_VNODES
) && 
1450                      !(ut
->uu_flag 
& UT_RAGE_VNODES
)) { 
1452                          * a 'normal' process accessed this vnode 
1453                          * so make sure its no longer marked 
1454                          * for rapid aging...  also, make sure 
1455                          * it gets removed from the rage list... 
1456                          * when v_usecount drops back to 0, it 
1457                          * will be put back on the real free list 
1459                         vp
->v_flag 
&= ~VRAGE
; 
1460                         vp
->v_references 
= 0; 
1461                         vnode_list_remove(vp
); 
1472  * put the vnode on appropriate free list. 
1473  * called with vnode LOCKED 
1476 vnode_list_add(vnode_t vp
) 
1479          * if it is already on a list or non zero references return  
1481         if (VONLIST(vp
) || (vp
->v_usecount 
!= 0) || (vp
->v_iocount 
!= 0)) 
1485         if ((vp
->v_flag 
& VRAGE
) && !(vp
->v_lflag 
& VL_DEAD
)) { 
1487                  * add the new guy to the appropriate end of the RAGE list 
1489                 if ((vp
->v_flag 
& VAGE
)) 
1490                         TAILQ_INSERT_HEAD(&vnode_rage_list
, vp
, v_freelist
); 
1492                         TAILQ_INSERT_TAIL(&vnode_rage_list
, vp
, v_freelist
); 
1494                 vp
->v_listflag 
|= VLIST_RAGE
; 
1498                  * reset the timestamp for the last inserted vp on the RAGE 
1499                  * queue to let new_vnode know that its not ok to start stealing 
1500                  * from this list... as long as we're actively adding to this list 
1501                  * we'll push out the vnodes we want to donate to the real free list 
1502                  * once we stop pushing, we'll let some time elapse before we start 
1503                  * stealing them in the new_vnode routine 
1505                 microuptime(&rage_tv
); 
1508                  * if VL_DEAD, insert it at head of the dead list 
1509                  * else insert at tail of LRU list or at head if VAGE is set 
1511                 if ( (vp
->v_lflag 
& VL_DEAD
)) { 
1512                         TAILQ_INSERT_HEAD(&vnode_dead_list
, vp
, v_freelist
); 
1513                         vp
->v_listflag 
|= VLIST_DEAD
; 
1515                 } else if ((vp
->v_flag 
& VAGE
)) { 
1516                         TAILQ_INSERT_HEAD(&vnode_free_list
, vp
, v_freelist
); 
1517                         vp
->v_flag 
&= ~VAGE
; 
1520                         TAILQ_INSERT_TAIL(&vnode_free_list
, vp
, v_freelist
); 
1524         vnode_list_unlock(); 
1529  * remove the vnode from appropriate free list. 
1530  * called with vnode LOCKED and 
1531  * the list lock held 
1534 vnode_list_remove_locked(vnode_t vp
) 
1538                  * the v_listflag field is 
1539                  * protected by the vnode_list_lock 
1541                 if (vp
->v_listflag 
& VLIST_RAGE
) 
1542                         VREMRAGE("vnode_list_remove", vp
); 
1543                 else if (vp
->v_listflag 
& VLIST_DEAD
) 
1544                         VREMDEAD("vnode_list_remove", vp
); 
1546                         VREMFREE("vnode_list_remove", vp
); 
1552  * remove the vnode from appropriate free list. 
1553  * called with vnode LOCKED 
1556 vnode_list_remove(vnode_t vp
) 
1559          * we want to avoid taking the list lock 
1560          * in the case where we're not on the free 
1561          * list... this will be true for most 
1562          * directories and any currently in use files 
1564          * we're guaranteed that we can't go from 
1565          * the not-on-list state to the on-list  
1566          * state since we hold the vnode lock... 
1567          * all calls to vnode_list_add are done 
1568          * under the vnode lock... so we can 
1569          * check for that condition (the prevelant one) 
1570          * without taking the list lock 
1575                  * however, we're not guaranteed that 
1576                  * we won't go from the on-list state 
1577                  * to the not-on-list state until we 
1578                  * hold the vnode_list_lock... this  
1579                  * is due to "new_vnode" removing vnodes 
1580                  * from the free list uder the list_lock 
1581                  * w/o the vnode lock... so we need to 
1582                  * check again whether we're currently 
1585                 vnode_list_remove_locked(vp
); 
1587                 vnode_list_unlock(); 
1593 vnode_rele(vnode_t vp
) 
1595         vnode_rele_internal(vp
, 0, 0, 0); 
1600 vnode_rele_ext(vnode_t vp
, int fmode
, int dont_reenter
) 
1602         vnode_rele_internal(vp
, fmode
, dont_reenter
, 0); 
1607 vnode_rele_internal(vnode_t vp
, int fmode
, int dont_reenter
, int locked
) 
1610                 vnode_lock_spin(vp
); 
1612         if (--vp
->v_usecount 
< 0) 
1613                 panic("vnode_rele_ext: vp %p usecount -ve : %d", vp
,  vp
->v_usecount
); 
1615         if (fmode 
& FWRITE
) { 
1616                 if (--vp
->v_writecount 
< 0) 
1617                         panic("vnode_rele_ext: vp %p writecount -ve : %ld", vp
,  vp
->v_writecount
); 
1619         if (fmode 
& O_EVTONLY
) { 
1620                 if (--vp
->v_kusecount 
< 0) 
1621                         panic("vnode_rele_ext: vp %p kusecount -ve : %d", vp
,  vp
->v_kusecount
); 
1623         if (vp
->v_kusecount 
> vp
->v_usecount
) 
1624                 panic("vnode_rele_ext: vp %p kusecount(%d) out of balance with usecount(%d)\n",vp
, vp
->v_kusecount
, vp
->v_usecount
); 
1625         if ((vp
->v_iocount 
> 0) || (vp
->v_usecount 
> 0)) { 
1627                  * vnode is still busy... if we're the last 
1628                  * usecount, mark for a future call to VNOP_INACTIVE 
1629                  * when the iocount finally drops to 0 
1631                 if (vp
->v_usecount 
== 0) { 
1632                         vp
->v_lflag 
|= VL_NEEDINACTIVE
; 
1633                         vp
->v_flag  
&= ~(VNOCACHE_DATA 
| VRAOFF 
| VOPENEVT
); 
1639         vp
->v_flag  
&= ~(VNOCACHE_DATA 
| VRAOFF 
| VOPENEVT
); 
1641         if ( (vp
->v_lflag 
& (VL_TERMINATE 
| VL_DEAD
)) || dont_reenter
) { 
1643                  * vnode is being cleaned, or 
1644                  * we've requested that we don't reenter 
1645                  * the filesystem on this release... in 
1646                  * this case, we'll mark the vnode aged 
1647                  * if it's been marked for termination 
1650                         if ( !(vp
->v_lflag 
& (VL_TERMINATE 
| VL_DEAD 
| VL_MARKTERM
)) ) 
1651                                 vp
->v_lflag 
|= VL_NEEDINACTIVE
; 
1660          * at this point both the iocount and usecount 
1662          * pick up an iocount so that we can call 
1663          * VNOP_INACTIVE with the vnode lock unheld 
1669         vp
->v_lflag 
&= ~VL_NEEDINACTIVE
; 
1672         VNOP_INACTIVE(vp
, vfs_context_current()); 
1674         vnode_lock_spin(vp
); 
1676          * because we dropped the vnode lock to call VNOP_INACTIVE 
1677          * the state of the vnode may have changed... we may have 
1678          * picked up an iocount, usecount or the MARKTERM may have 
1679          * been set... we need to reevaluate the reference counts 
1680          * to determine if we can call vnode_reclaim_internal at 
1681          * this point... if the reference counts are up, we'll pick 
1682          * up the MARKTERM state when they get subsequently dropped 
1684         if ( (vp
->v_iocount 
== 1) && (vp
->v_usecount 
== 0) && 
1685              ((vp
->v_lflag 
& (VL_MARKTERM 
| VL_TERMINATE 
| VL_DEAD
)) == VL_MARKTERM
)) { 
1688                 ut 
= get_bsdthread_info(current_thread()); 
1690                 if (ut
->uu_defer_reclaims
) { 
1691                         vp
->v_defer_reclaimlist 
= ut
->uu_vreclaims
; 
1692                                 ut
->uu_vreclaims 
= vp
; 
1695                 vnode_lock_convert(vp
); 
1696                 vnode_reclaim_internal(vp
, 1, 1, 0); 
1698         vnode_dropiocount(vp
); 
1707  * Remove any vnodes in the vnode table belonging to mount point mp. 
1709  * If MNT_NOFORCE is specified, there should not be any active ones, 
1710  * return error if any are found (nb: this is a user error, not a 
1711  * system error). If MNT_FORCE is specified, detach any active vnodes 
1715 int busyprt 
= 0;        /* print out busy vnodes */ 
1717 struct ctldebug debug1 
= { "busyprt", &busyprt 
}; 
1722 vflush(struct mount 
*mp
, struct vnode 
*skipvp
, int flags
) 
1731         vnode_iterate_setup(mp
); 
1733          * On regular unmounts(not forced) do a 
1734          * quick check for vnodes to be in use. This 
1735          * preserves the caching of vnodes. automounter 
1736          * tries unmounting every so often to see whether 
1737          * it is still busy or not. 
1739         if (((flags 
& FORCECLOSE
)==0)  && ((mp
->mnt_kern_flag 
& MNTK_UNMOUNT_PREFLIGHT
) != 0)) { 
1740                 if (vnode_umount_preflight(mp
, skipvp
, flags
)) { 
1741                         vnode_iterate_clear(mp
); 
1747         /* it is returns 0 then there is nothing to do */ 
1748         retval 
= vnode_iterate_prepare(mp
); 
1751                 vnode_iterate_clear(mp
); 
1756     /* iterate over all the vnodes */ 
1757     while (!TAILQ_EMPTY(&mp
->mnt_workerqueue
)) { 
1758         vp 
= TAILQ_FIRST(&mp
->mnt_workerqueue
); 
1759         TAILQ_REMOVE(&mp
->mnt_workerqueue
, vp
, v_mntvnodes
); 
1760         TAILQ_INSERT_TAIL(&mp
->mnt_vnodelist
, vp
, v_mntvnodes
); 
1761         if ( (vp
->v_mount 
!= mp
) || (vp 
== skipvp
)) { 
1768                 if ((vp
->v_id 
!= vid
) || ((vp
->v_lflag 
& (VL_DEAD 
| VL_TERMINATE
)))) { 
1775                  * If requested, skip over vnodes marked VSYSTEM. 
1776                  * Skip over all vnodes marked VNOFLUSH. 
1778                 if ((flags 
& SKIPSYSTEM
) && ((vp
->v_flag 
& VSYSTEM
) || 
1779                     (vp
->v_flag 
& VNOFLUSH
))) { 
1785                  * If requested, skip over vnodes marked VSWAP. 
1787                 if ((flags 
& SKIPSWAP
) && (vp
->v_flag 
& VSWAP
)) { 
1793                  * If requested, skip over vnodes marked VSWAP. 
1795                 if ((flags 
& SKIPROOT
) && (vp
->v_flag 
& VROOT
)) { 
1801                  * If WRITECLOSE is set, only flush out regular file 
1802                  * vnodes open for writing. 
1804                 if ((flags 
& WRITECLOSE
) && 
1805                     (vp
->v_writecount 
== 0 || vp
->v_type 
!= VREG
)) { 
1811                  * If the real usecount is 0, all we need to do is clear 
1812                  * out the vnode data structures and we are done. 
1814                 if (((vp
->v_usecount 
== 0) || 
1815                     ((vp
->v_usecount 
- vp
->v_kusecount
) == 0))) { 
1816                         vp
->v_iocount
++;        /* so that drain waits for * other iocounts */ 
1820                         vnode_reclaim_internal(vp
, 1, 1, 0); 
1821                         vnode_dropiocount(vp
); 
1830                  * If FORCECLOSE is set, forcibly close the vnode. 
1831                  * For block or character devices, revert to an 
1832                  * anonymous device. For all other files, just kill them. 
1834                 if (flags 
& FORCECLOSE
) { 
1835                         if (vp
->v_type 
!= VBLK 
&& vp
->v_type 
!= VCHR
) { 
1836                                 vp
->v_iocount
++;        /* so that drain waits * for other iocounts */ 
1840                                 vnode_reclaim_internal(vp
, 1, 1, 0); 
1841                                 vnode_dropiocount(vp
); 
1846                                 vp
->v_lflag 
&= ~VL_DEAD
; 
1847                                 vp
->v_op 
= spec_vnodeop_p
; 
1848                                 vp
->v_flag 
|= VDEVFLUSH
; 
1856                         vprint("vflush: busy vnode", vp
); 
1863         /* At this point the worker queue is completed */ 
1864         if (busy 
&& ((flags 
& FORCECLOSE
)==0) && reclaimed
) { 
1867                 (void)vnode_iterate_reloadq(mp
); 
1868                 /* returned with mount lock held */ 
1872         /* if new vnodes were created in between retry the reclaim */ 
1873         if ( vnode_iterate_reloadq(mp
) != 0) { 
1874                 if (!(busy 
&& ((flags 
& FORCECLOSE
)==0))) 
1877         vnode_iterate_clear(mp
); 
1880         if (busy 
&& ((flags 
& FORCECLOSE
)==0)) 
1885 long num_recycledvnodes 
= 0;    /* long for OSAddAtomic */ 
1887  * Disassociate the underlying file system from a vnode. 
1888  * The vnode lock is held on entry. 
1891 vclean(vnode_t vp
, int flags
) 
1893         vfs_context_t ctx 
= vfs_context_current(); 
1896         int already_terminating
; 
1904          * Check to see if the vnode is in use. 
1905          * If so we have to reference it before we clean it out 
1906          * so that its count cannot fall to zero and generate a 
1907          * race against ourselves to recycle it. 
1909         active 
= vp
->v_usecount
; 
1912          * just in case we missed sending a needed 
1913          * VNOP_INACTIVE, we'll do it now 
1915         need_inactive 
= (vp
->v_lflag 
& VL_NEEDINACTIVE
); 
1917         vp
->v_lflag 
&= ~VL_NEEDINACTIVE
; 
1920          * Prevent the vnode from being recycled or 
1921          * brought into use while we clean it out. 
1923         already_terminating 
= (vp
->v_lflag 
& VL_TERMINATE
); 
1925         vp
->v_lflag 
|= VL_TERMINATE
; 
1928          * remove the vnode from any mount list 
1931         insmntque(vp
, (struct mount 
*)0); 
1934         is_namedstream 
= vnode_isnamedstream(vp
); 
1939         OSAddAtomic(1, &num_recycledvnodes
); 
1941          * purge from the name cache as early as possible... 
1945         if (flags 
& DOCLOSE
) 
1946                 clflags 
|= IO_NDELAY
; 
1947         if (flags 
& REVOKEALL
) 
1948                 clflags 
|= IO_REVOKE
; 
1950         if (active 
&& (flags 
& DOCLOSE
)) 
1951                 VNOP_CLOSE(vp
, clflags
, ctx
); 
1954          * Clean out any buffers associated with the vnode. 
1956         if (flags 
& DOCLOSE
) { 
1958                 if (vp
->v_tag 
== VT_NFS
) 
1959                         nfs_vinvalbuf(vp
, V_SAVE
, ctx
, 0); 
1963                         VNOP_FSYNC(vp
, MNT_WAIT
, ctx
); 
1964                         buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0); 
1966                 if (UBCINFOEXISTS(vp
)) 
1968                          * Clean the pages in VM. 
1970                         (void)ubc_sync_range(vp
, (off_t
)0, ubc_getsize(vp
), UBC_PUSHALL
); 
1972         if (active 
|| need_inactive
)  
1973                 VNOP_INACTIVE(vp
, ctx
); 
1976         /* Delete the shadow stream file before we reclaim its vnode */ 
1977         if ((is_namedstream 
!= 0) && 
1978                         (vp
->v_parent 
!= NULLVP
) && 
1979                         ((vp
->v_parent
->v_mount
->mnt_kern_flag 
& MNTK_NAMED_STREAMS
) == 0)) { 
1980                 vnode_relenamedstream(vp
->v_parent
, vp
, ctx
); 
1985          * Destroy ubc named reference 
1986          * cluster_release is done on this path 
1987          * along with dropping the reference on the ucred 
1989         ubc_destroy_named(vp
); 
1992          * Reclaim the vnode. 
1994         if (VNOP_RECLAIM(vp
, ctx
)) 
1995                 panic("vclean: cannot reclaim"); 
1997         // make sure the name & parent ptrs get cleaned out! 
1998         vnode_update_identity(vp
, NULLVP
, NULL
, 0, 0, VNODE_UPDATE_PARENT 
| VNODE_UPDATE_NAME
); 
2002         vp
->v_mount 
= dead_mountp
; 
2003         vp
->v_op 
= dead_vnodeop_p
; 
2007         vp
->v_lflag 
|= VL_DEAD
; 
2009         if (already_terminating 
== 0) { 
2010                 vp
->v_lflag 
&= ~VL_TERMINATE
; 
2012                  * Done with purge, notify sleepers of the grim news. 
2014                 if (vp
->v_lflag 
& VL_TERMWANT
) { 
2015                         vp
->v_lflag 
&= ~VL_TERMWANT
; 
2016                         wakeup(&vp
->v_lflag
); 
2022  * Eliminate all activity associated with  the requested vnode 
2023  * and with all vnodes aliased to the requested vnode. 
2027 vn_revoke(vnode_t vp
, int flags
, __unused vfs_context_t a_context
) 
2029 vn_revoke(vnode_t vp
, __unused 
int flags
, __unused vfs_context_t a_context
) 
2036         if ((flags 
& REVOKEALL
) == 0) 
2037                 panic("vnop_revoke"); 
2040         if (vp
->v_flag 
& VALIASED
) { 
2042                  * If a vgone (or vclean) is already in progress, 
2043                  * wait until it is done and return. 
2046                 if (vp
->v_lflag 
& VL_TERMINATE
) { 
2052                  * Ensure that vp will not be vgone'd while we 
2053                  * are eliminating its aliases. 
2056                 while (vp
->v_flag 
& VALIASED
) { 
2057                         for (vq 
= *vp
->v_hashchain
; vq
; vq 
= vq
->v_specnext
) { 
2058                                 if (vq
->v_rdev 
!= vp
->v_rdev 
|| 
2059                                     vq
->v_type 
!= vp
->v_type 
|| vp 
== vq
) 
2063                                 if (vnode_getwithvid(vq
,vid
)){ 
2067                                 vnode_reclaim_internal(vq
, 0, 1, 0); 
2075         vnode_reclaim_internal(vp
, 0, 0, REVOKEALL
); 
2081  * Recycle an unused vnode to the front of the free list. 
2082  * Release the passed interlock if the vnode will be recycled. 
2085 vnode_recycle(struct vnode 
*vp
) 
2089         if (vp
->v_iocount 
|| vp
->v_usecount
) { 
2090                 vp
->v_lflag 
|= VL_MARKTERM
; 
2094         vnode_reclaim_internal(vp
, 1, 0, 0); 
2102 vnode_reload(vnode_t vp
) 
2104         vnode_lock_spin(vp
); 
2106         if ((vp
->v_iocount 
> 1) || vp
->v_usecount
) { 
2110         if (vp
->v_iocount 
<= 0) 
2111                 panic("vnode_reload with no iocount %d", vp
->v_iocount
); 
2113         /* mark for release when iocount is dopped */ 
2114         vp
->v_lflag 
|= VL_MARKTERM
; 
2122 vgone(vnode_t vp
, int flags
) 
2128          * Clean out the filesystem specific data. 
2129          * vclean also takes care of removing the 
2130          * vnode from any mount list it might be on 
2132         vclean(vp
, flags 
| DOCLOSE
); 
2135          * If special device, remove it from special device alias list 
2138         if ((vp
->v_type 
== VBLK 
|| vp
->v_type 
== VCHR
) && vp
->v_specinfo 
!= 0) { 
2140                         if (*vp
->v_hashchain 
== vp
) { 
2141                                 *vp
->v_hashchain 
= vp
->v_specnext
; 
2143                                 for (vq 
= *vp
->v_hashchain
; vq
; vq 
= vq
->v_specnext
) { 
2144                                         if (vq
->v_specnext 
!= vp
) 
2146                                         vq
->v_specnext 
= vp
->v_specnext
; 
2150                                 panic("missing bdev"); 
2152                         if (vp
->v_flag 
& VALIASED
) { 
2154                                 for (vq 
= *vp
->v_hashchain
; vq
; vq 
= vq
->v_specnext
) { 
2155                                         if (vq
->v_rdev 
!= vp
->v_rdev 
|| 
2156                                         vq
->v_type 
!= vp
->v_type
) 
2163                                         panic("missing alias"); 
2165                                         vx
->v_flag 
&= ~VALIASED
; 
2166                                 vp
->v_flag 
&= ~VALIASED
; 
2170                         struct specinfo 
*tmp 
= vp
->v_specinfo
; 
2171                         vp
->v_specinfo 
= NULL
; 
2172                         FREE_ZONE((void *)tmp
, sizeof(struct specinfo
), M_SPECINFO
); 
2178  * Lookup a vnode by device number. 
2181 check_mountedon(dev_t dev
, enum vtype type
, int  *errorp
) 
2189         for (vp 
= speclisth
[SPECHASH(dev
)]; vp
; vp 
= vp
->v_specnext
) { 
2190                 if (dev 
!= vp
->v_rdev 
|| type 
!= vp
->v_type
) 
2194                 if (vnode_getwithvid(vp
,vid
)) 
2196                 vnode_lock_spin(vp
); 
2197                 if ((vp
->v_usecount 
> 0) || (vp
->v_iocount 
> 1)) { 
2199                         if ((*errorp 
= vfs_mountedon(vp
)) != 0) 
2211  * Calculate the total number of references to a special device. 
2221         if ((vp
->v_flag 
& VALIASED
) == 0) 
2222                 return (vp
->v_usecount 
- vp
->v_kusecount
); 
2227          * Grab first vnode and its vid. 
2229         vq 
= *vp
->v_hashchain
; 
2230         vid 
= vq 
? vq
->v_id 
: 0; 
2236                  * Attempt to get the vnode outside the SPECHASH lock. 
2238                 if (vnode_getwithvid(vq
, vid
)) { 
2243                 if (vq
->v_rdev 
== vp
->v_rdev 
&& vq
->v_type 
== vp
->v_type
) { 
2244                         if ((vq
->v_usecount 
== 0) && (vq
->v_iocount 
== 1)  && vq 
!= vp
) { 
2246                                  * Alias, but not in use, so flush it out. 
2248                                 vnode_reclaim_internal(vq
, 1, 1, 0); 
2249                                 vnode_put_locked(vq
); 
2253                         count 
+= (vq
->v_usecount 
- vq
->v_kusecount
); 
2259                  * must do this with the reference still held on 'vq' 
2260                  * so that it can't be destroyed while we're poking 
2261                  * through v_specnext 
2263                 vnext 
= vq
->v_specnext
; 
2264                 vid 
= vnext 
? vnext
->v_id 
: 0; 
2276 int     prtactive 
= 0;          /* 1 => print out reclaim of active vnodes */ 
2279  * Print out a description of a vnode. 
2281 #if !CONFIG_NO_PRINTF_STRINGS 
2282 static const char *typename
[] = 
2283    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 
2287 vprint(const char *label
, struct vnode 
*vp
) 
2292                 printf("%s: ", label
); 
2293         printf("type %s, usecount %d, writecount %ld", 
2294                typename
[vp
->v_type
], vp
->v_usecount
, vp
->v_writecount
); 
2296         if (vp
->v_flag 
& VROOT
) 
2297                 strlcat(sbuf
, "|VROOT", sizeof(sbuf
)); 
2298         if (vp
->v_flag 
& VTEXT
) 
2299                 strlcat(sbuf
, "|VTEXT", sizeof(sbuf
)); 
2300         if (vp
->v_flag 
& VSYSTEM
) 
2301                 strlcat(sbuf
, "|VSYSTEM", sizeof(sbuf
)); 
2302         if (vp
->v_flag 
& VNOFLUSH
) 
2303                 strlcat(sbuf
, "|VNOFLUSH", sizeof(sbuf
)); 
2304         if (vp
->v_flag 
& VBWAIT
) 
2305                 strlcat(sbuf
, "|VBWAIT", sizeof(sbuf
)); 
2306         if (vp
->v_flag 
& VALIASED
) 
2307                 strlcat(sbuf
, "|VALIASED", sizeof(sbuf
)); 
2308         if (sbuf
[0] != '\0') 
2309                 printf(" flags (%s)", &sbuf
[1]); 
2314 vn_getpath(struct vnode 
*vp
, char *pathbuf
, int *len
) 
2316         return build_path(vp
, pathbuf
, *len
, len
, BUILDPATH_NO_FS_ENTER
, vfs_context_current()); 
2321 vn_getcdhash(struct vnode 
*vp
, off_t offset
, unsigned char *cdhash
) 
2323         return ubc_cs_getcdhash(vp
, offset
, cdhash
); 
2327 static char *extension_table
=NULL
; 
2329 static int   max_ext_width
; 
2332 extension_cmp(const void *a
, const void *b
) 
2334     return (strlen((const char *)a
) - strlen((const char *)b
)); 
2339 // This is the api LaunchServices uses to inform the kernel 
2340 // the list of package extensions to ignore. 
2342 // Internally we keep the list sorted by the length of the 
2343 // the extension (from longest to shortest).  We sort the 
2344 // list of extensions so that we can speed up our searches 
2345 // when comparing file names -- we only compare extensions 
2346 // that could possibly fit into the file name, not all of 
2347 // them (i.e. a short 8 character name can't have an 8 
2348 // character extension). 
2350 __private_extern__ 
int 
2351 set_package_extensions_table(void *data
, int nentries
, int maxwidth
) 
2356     if (nentries 
<= 0 || nentries 
> 1024 || maxwidth 
<= 0 || maxwidth 
> 255) { 
2360     MALLOC(new_exts
, char *, nentries 
* maxwidth
, M_TEMP
, M_WAITOK
); 
2362     error 
= copyin(CAST_USER_ADDR_T(data
), new_exts
, nentries 
* maxwidth
); 
2364         FREE(new_exts
, M_TEMP
); 
2368     if (extension_table
) { 
2369         FREE(extension_table
, M_TEMP
); 
2371     extension_table 
= new_exts
; 
2373     max_ext_width   
= maxwidth
; 
2375     qsort(extension_table
, nexts
, maxwidth
, extension_cmp
); 
2381 __private_extern__ 
int 
2382 is_package_name(const char *name
, int len
) 
2385     const char *ptr
, *name_ext
; 
2392     for(ptr
=name
; *ptr 
!= '\0'; ptr
++) { 
2398     // if there is no "." extension, it can't match 
2399     if (name_ext 
== NULL
) { 
2403     // advance over the "." 
2406     // now iterate over all the extensions to see if any match 
2407     ptr 
= &extension_table
[0]; 
2408     for(i
=0; i 
< nexts
; i
++, ptr
+=max_ext_width
) { 
2409         extlen 
= strlen(ptr
); 
2410         if (strncasecmp(name_ext
, ptr
, extlen
) == 0 && name_ext
[extlen
] == '\0') { 
2416     // if we get here, no extension matched 
2421 vn_path_package_check(__unused vnode_t vp
, char *path
, int pathlen
, int *component
) 
2432     while(end 
< path 
+ pathlen 
&& *end 
!= '\0') { 
2433         while(end 
< path 
+ pathlen 
&& *end 
== '/' && *end 
!= '\0') { 
2439         while(end 
< path 
+ pathlen 
&& *end 
!= '/' && *end 
!= '\0') { 
2443         if (end 
> path 
+ pathlen
) { 
2444             // hmm, string wasn't null terminated  
2449         if (is_package_name(ptr
, end 
- ptr
)) { 
2463  * Top level filesystem related information gathering. 
2465 extern unsigned int vfs_nummntops
; 
2468 vfs_sysctl(int *name
, u_int namelen
, user_addr_t oldp
, size_t *oldlenp
,  
2469            user_addr_t newp
, size_t newlen
, proc_t p
) 
2471         struct vfstable 
*vfsp
; 
2475         struct vfsconf 
*vfsc
; 
2477         /* All non VFS_GENERIC and in VFS_GENERIC,  
2478          * VFS_MAXTYPENUM, VFS_CONF, VFS_SET_PACKAGE_EXTS 
2479          * needs to have root priv to have modifiers.  
2480          * For rest the userland_sysctl(CTLFLAG_ANYBODY) would cover. 
2482         if ((newp 
!= USER_ADDR_NULL
) && ((name
[0] != VFS_GENERIC
) ||  
2483                         ((name
[1] == VFS_MAXTYPENUM
) || 
2484                          (name
[1] == VFS_CONF
) || 
2485                          (name
[1] == VFS_SET_PACKAGE_EXTS
))) 
2486              && (error 
= suser(kauth_cred_get(), &p
->p_acflag
))) { 
2490          * The VFS_NUMMNTOPS shouldn't be at name[0] since 
2491          * is a VFS generic variable. So now we must check 
2492          * namelen so we don't end up covering any UFS 
2493          * variables (sinc UFS vfc_typenum is 1). 
2495          * It should have been: 
2496          *    name[0]:  VFS_GENERIC 
2497          *    name[1]:  VFS_NUMMNTOPS 
2499         if (namelen 
== 1 && name
[0] == VFS_NUMMNTOPS
) { 
2500                 return (sysctl_rdint(oldp
, oldlenp
, newp
, vfs_nummntops
)); 
2503         /* all sysctl names at this level are at least name and field */ 
2505                 return (EISDIR
);                /* overloaded */ 
2506         if (name
[0] != VFS_GENERIC
) { 
2507                 for (vfsp 
= vfsconf
; vfsp
; vfsp 
= vfsp
->vfc_next
) 
2508                         if (vfsp
->vfc_typenum 
== name
[0]) 
2513                 /* XXX current context proxy for proc p? */ 
2514                 return ((*vfsp
->vfc_vfsops
->vfs_sysctl
)(&name
[1], namelen 
- 1, 
2515                             oldp
, oldlenp
, newp
, newlen
, 
2516                             vfs_context_current())); 
2519         case VFS_MAXTYPENUM
: 
2520                 return (sysctl_rdint(oldp
, oldlenp
, newp
, maxvfsconf
)); 
2523                         return (ENOTDIR
);       /* overloaded */ 
2524                 for (vfsp 
= vfsconf
; vfsp
; vfsp 
= vfsp
->vfc_next
) 
2525                         if (vfsp
->vfc_typenum 
== name
[2]) 
2529                 vfsc 
= (struct vfsconf 
*)vfsp
; 
2530                 if (proc_is64bit(p
)) { 
2531                     struct user_vfsconf  usr_vfsc
; 
2532                     usr_vfsc
.vfc_vfsops 
= CAST_USER_ADDR_T(vfsc
->vfc_vfsops
); 
2533                 bcopy(vfsc
->vfc_name
, usr_vfsc
.vfc_name
, sizeof(usr_vfsc
.vfc_name
)); 
2534                     usr_vfsc
.vfc_typenum 
= vfsc
->vfc_typenum
; 
2535                     usr_vfsc
.vfc_refcount 
= vfsc
->vfc_refcount
; 
2536                     usr_vfsc
.vfc_flags 
= vfsc
->vfc_flags
; 
2537                     usr_vfsc
.vfc_mountroot 
= CAST_USER_ADDR_T(vfsc
->vfc_mountroot
); 
2538                     usr_vfsc
.vfc_next 
= CAST_USER_ADDR_T(vfsc
->vfc_next
); 
2539             return (sysctl_rdstruct(oldp
, oldlenp
, newp
, &usr_vfsc
, 
2543             return (sysctl_rdstruct(oldp
, oldlenp
, newp
, vfsc
, 
2544                                     sizeof(struct vfsconf
))); 
2547         case VFS_SET_PACKAGE_EXTS
: 
2548                 return set_package_extensions_table((void *)name
[1], name
[2], name
[3]); 
2551          * We need to get back into the general MIB, so we need to re-prepend 
2552          * CTL_VFS to our name and try userland_sysctl(). 
2554         usernamelen 
= namelen 
+ 1; 
2555         MALLOC(username
, int *, usernamelen 
* sizeof(*username
), 
2557         bcopy(name
, username 
+ 1, namelen 
* sizeof(*name
)); 
2558         username
[0] = CTL_VFS
; 
2559         error 
= userland_sysctl(p
, username
, usernamelen
, oldp
,  
2560                                 oldlenp
, 1, newp
, newlen
, oldlenp
); 
2561         FREE(username
, M_TEMP
); 
2566  * Dump vnode list (via sysctl) - defunct 
2567  * use "pstat" instead 
2572 (__unused 
struct sysctl_oid 
*oidp
, __unused 
void *arg1
, __unused 
int arg2
, __unused 
struct sysctl_req 
*req
) 
2577 SYSCTL_PROC(_kern
, KERN_VNODE
, vnode
, 
2578                 CTLTYPE_STRUCT 
| CTLFLAG_RD 
| CTLFLAG_MASKED
, 
2579                 0, 0, sysctl_vnode
, "S,", ""); 
2583  * Check to see if a filesystem is mounted on a block device. 
2586 vfs_mountedon(struct vnode 
*vp
) 
2592         if (vp
->v_specflags 
& SI_MOUNTEDON
) { 
2596         if (vp
->v_flag 
& VALIASED
) { 
2597                 for (vq 
= *vp
->v_hashchain
; vq
; vq 
= vq
->v_specnext
) { 
2598                         if (vq
->v_rdev 
!= vp
->v_rdev 
|| 
2599                             vq
->v_type 
!= vp
->v_type
) 
2601                         if (vq
->v_specflags 
& SI_MOUNTEDON
) { 
2613  * Unmount all filesystems. The list is traversed in reverse order 
2614  * of mounting to avoid dependencies. 
2616 __private_extern__ 
void 
2617 vfs_unmountall(void) 
2623          * Since this only runs when rebooting, it is not interlocked. 
2626         while(!TAILQ_EMPTY(&mountlist
)) { 
2627                 mp 
= TAILQ_LAST(&mountlist
, mntlist
); 
2628                 mount_list_unlock(); 
2629                 error 
= dounmount(mp
, MNT_FORCE
, 0, vfs_context_current()); 
2630                 if ((error 
!= 0) && (error 
!= EBUSY
)) { 
2631                         printf("unmount of %s failed (", mp
->mnt_vfsstat
.f_mntonname
); 
2632                         printf("%d)\n", error
); 
2634                         TAILQ_REMOVE(&mountlist
, mp
, mnt_list
); 
2636                 } else if (error 
== EBUSY
) { 
2637                         /* If EBUSY is returned,  the unmount was already in progress */ 
2638                         printf("unmount of %x failed (", (unsigned int)mp
); 
2643         mount_list_unlock(); 
2648  * This routine is called from vnode_pager_deallocate out of the VM 
2649  * The path to vnode_pager_deallocate can only be initiated by ubc_destroy_named 
2650  * on a vnode that has a UBCINFO 
2652 __private_extern__ 
void 
2653 vnode_pager_vrele(vnode_t vp
) 
2655         struct ubc_info 
*uip
; 
2659         vp
->v_lflag 
&= ~VNAMED_UBC
; 
2661         uip 
= vp
->v_ubcinfo
; 
2662         vp
->v_ubcinfo 
= UBC_INFO_NULL
; 
2664         ubc_info_deallocate(uip
); 
2670 #include <sys/disk.h> 
2673 vfs_init_io_attributes(vnode_t devvp
, mount_t mp
) 
2677         off_t   writeblockcnt
; 
2688         vfs_context_t ctx 
= vfs_context_current(); 
2692          * determine if this mount point exists on the same device as the root 
2693          * partition... if so, then it comes under the hard throttle control 
2696         static int rootunit 
= -1; 
2698         if (rootunit 
== -1) { 
2699                 if (VNOP_IOCTL(rootvp
, DKIOCGETBSDUNIT
, (caddr_t
)&rootunit
, 0, ctx
)) 
2701                 else if (rootvp 
== devvp
) 
2702                         mp
->mnt_kern_flag 
|= MNTK_ROOTDEV
; 
2704         if (devvp 
!= rootvp 
&& rootunit 
!= -1) { 
2705                 if (VNOP_IOCTL(devvp
, DKIOCGETBSDUNIT
, (caddr_t
)&thisunit
, 0, ctx
) == 0) { 
2706                         if (thisunit 
== rootunit
) 
2707                                 mp
->mnt_kern_flag 
|= MNTK_ROOTDEV
; 
2711          * force the spec device to re-cache 
2712          * the underlying block size in case 
2713          * the filesystem overrode the initial value 
2715         set_fsblocksize(devvp
); 
2718         if ((error 
= VNOP_IOCTL(devvp
, DKIOCGETBLOCKSIZE
, 
2719                                 (caddr_t
)&blksize
, 0, ctx
))) 
2722         mp
->mnt_devblocksize 
= blksize
; 
2724         if (VNOP_IOCTL(devvp
, DKIOCISVIRTUAL
, (caddr_t
)&isvirtual
, 0, ctx
) == 0) { 
2726                         mp
->mnt_kern_flag 
|= MNTK_VIRTUALDEV
; 
2729         if ((error 
= VNOP_IOCTL(devvp
, DKIOCGETFEATURES
, 
2730                                 (caddr_t
)&features
, 0, ctx
))) 
2733         if ((error 
= VNOP_IOCTL(devvp
, DKIOCGETMAXBLOCKCOUNTREAD
, 
2734                                 (caddr_t
)&readblockcnt
, 0, ctx
))) 
2737         if ((error 
= VNOP_IOCTL(devvp
, DKIOCGETMAXBLOCKCOUNTWRITE
, 
2738                                 (caddr_t
)&writeblockcnt
, 0, ctx
))) 
2741         if ((error 
= VNOP_IOCTL(devvp
, DKIOCGETMAXBYTECOUNTREAD
, 
2742                                 (caddr_t
)&readmaxcnt
, 0, ctx
))) 
2745         if ((error 
= VNOP_IOCTL(devvp
, DKIOCGETMAXBYTECOUNTWRITE
, 
2746                                 (caddr_t
)&writemaxcnt
, 0, ctx
))) 
2749         if ((error 
= VNOP_IOCTL(devvp
, DKIOCGETMAXSEGMENTCOUNTREAD
, 
2750                                 (caddr_t
)&readsegcnt
, 0, ctx
))) 
2753         if ((error 
= VNOP_IOCTL(devvp
, DKIOCGETMAXSEGMENTCOUNTWRITE
, 
2754                                 (caddr_t
)&writesegcnt
, 0, ctx
))) 
2757         if ((error 
= VNOP_IOCTL(devvp
, DKIOCGETMAXSEGMENTBYTECOUNTREAD
, 
2758                                 (caddr_t
)&readsegsize
, 0, ctx
))) 
2761         if ((error 
= VNOP_IOCTL(devvp
, DKIOCGETMAXSEGMENTBYTECOUNTWRITE
, 
2762                                 (caddr_t
)&writesegsize
, 0, ctx
))) 
2765         if ((error 
= VNOP_IOCTL(devvp
, DKIOCGETMINSEGMENTALIGNMENTBYTECOUNT
, 
2766                                 (caddr_t
)&alignment
, 0, ctx
))) 
2770                 temp 
= (readmaxcnt 
> UINT32_MAX
) ? UINT32_MAX 
: readmaxcnt
; 
2773                         temp 
= readblockcnt 
* blksize
; 
2774                         temp 
= (temp 
> UINT32_MAX
) ? UINT32_MAX 
: temp
; 
2778         mp
->mnt_maxreadcnt 
= (u_int32_t
)temp
; 
2781                 temp 
= (writemaxcnt 
> UINT32_MAX
) ? UINT32_MAX 
: writemaxcnt
; 
2783                 if (writeblockcnt
) { 
2784                         temp 
= writeblockcnt 
* blksize
; 
2785                         temp 
= (temp 
> UINT32_MAX
) ? UINT32_MAX 
: temp
; 
2789         mp
->mnt_maxwritecnt 
= (u_int32_t
)temp
; 
2792                 temp 
= (readsegcnt 
> UINT16_MAX
) ? UINT16_MAX 
: readsegcnt
; 
2793                 mp
->mnt_segreadcnt 
= (u_int16_t
)temp
; 
2796                 temp 
= (writesegcnt 
> UINT16_MAX
) ? UINT16_MAX 
: writesegcnt
; 
2797                 mp
->mnt_segwritecnt 
= (u_int16_t
)temp
; 
2800                 temp 
= (readsegsize 
> UINT32_MAX
) ? UINT32_MAX 
: readsegsize
; 
2802                 temp 
= mp
->mnt_maxreadcnt
; 
2803         mp
->mnt_maxsegreadsize 
= (u_int32_t
)temp
; 
2806                 temp 
= (writesegsize 
> UINT32_MAX
) ? UINT32_MAX 
: writesegsize
; 
2808                 temp 
= mp
->mnt_maxwritecnt
; 
2809         mp
->mnt_maxsegwritesize 
= (u_int32_t
)temp
; 
2812                 temp 
= (alignment 
> PAGE_SIZE
) ? PAGE_MASK 
: alignment 
- 1; 
2815         mp
->mnt_alignmentmask 
= temp
; 
2817         if (features 
& DK_FEATURE_FORCE_UNIT_ACCESS
) 
2818                 mp
->mnt_ioflags 
|= MNT_IOFLAGS_FUA_SUPPORTED
; 
2823 static struct klist fs_klist
; 
2824 lck_grp_t 
*fs_klist_lck_grp
; 
2825 lck_mtx_t 
*fs_klist_lock
; 
2828 vfs_event_init(void) 
2830         klist_init(&fs_klist
); 
2831         fs_klist_lck_grp 
= lck_grp_alloc_init("fs_klist", NULL
); 
2832         fs_klist_lock 
= lck_mtx_alloc_init(fs_klist_lck_grp
, NULL
); 
2836 vfs_event_signal(__unused fsid_t 
*fsid
, u_int32_t event
, __unused 
intptr_t data
) 
2838         lck_mtx_lock(fs_klist_lock
); 
2839         KNOTE(&fs_klist
, event
); 
2840         lck_mtx_unlock(fs_klist_lock
); 
2844  * return the number of mounted filesystems. 
2847 sysctl_vfs_getvfscnt(void) 
2849         return(mount_getvfscnt()); 
2854 mount_getvfscnt(void) 
2860         mount_list_unlock(); 
2868 mount_fillfsids(fsid_t 
*fsidlst
, int count
) 
2875         TAILQ_FOREACH(mp
, &mountlist
, mnt_list
) { 
2876                 if (actual 
<= count
) { 
2877                         fsidlst
[actual
] = mp
->mnt_vfsstat
.f_fsid
; 
2881         mount_list_unlock(); 
2887  * fill in the array of fsid_t's up to a max of 'count', the actual 
2888  * number filled in will be set in '*actual'.  If there are more fsid_t's 
2889  * than room in fsidlst then ENOMEM will be returned and '*actual' will 
2890  * have the actual count. 
2891  * having *actual filled out even in the error case is depended upon. 
2894 sysctl_vfs_getvfslist(fsid_t 
*fsidlst
, int count
, int *actual
) 
2900         TAILQ_FOREACH(mp
, &mountlist
, mnt_list
) { 
2902                 if (*actual 
<= count
) 
2903                         fsidlst
[(*actual
) - 1] = mp
->mnt_vfsstat
.f_fsid
; 
2905         mount_list_unlock(); 
2906         return (*actual 
<= count 
? 0 : ENOMEM
); 
2910 sysctl_vfs_vfslist(__unused 
struct sysctl_oid 
*oidp
, __unused 
void *arg1
, 
2911                 __unused 
int arg2
, struct sysctl_req 
*req
) 
2917         /* This is a readonly node. */ 
2918         if (req
->newptr 
!= USER_ADDR_NULL
) 
2921         /* they are querying us so just return the space required. */ 
2922         if (req
->oldptr 
== USER_ADDR_NULL
) { 
2923                 req
->oldidx 
= sysctl_vfs_getvfscnt() * sizeof(fsid_t
); 
2928          * Retrieve an accurate count of the amount of space required to copy 
2929          * out all the fsids in the system. 
2931         space 
= req
->oldlen
; 
2932         req
->oldlen 
= sysctl_vfs_getvfscnt() * sizeof(fsid_t
); 
2934         /* they didn't give us enough space. */ 
2935         if (space 
< req
->oldlen
) 
2938         MALLOC(fsidlst
, fsid_t 
*, req
->oldlen
, M_TEMP
, M_WAITOK
); 
2939         error 
= sysctl_vfs_getvfslist(fsidlst
, req
->oldlen 
/ sizeof(fsid_t
), 
2942          * If we get back ENOMEM, then another mount has been added while we 
2943          * slept in malloc above.  If this is the case then try again. 
2945         if (error 
== ENOMEM
) { 
2946                 FREE(fsidlst
, M_TEMP
); 
2947                 req
->oldlen 
= space
; 
2951                 error 
= SYSCTL_OUT(req
, fsidlst
, actual 
* sizeof(fsid_t
)); 
2953         FREE(fsidlst
, M_TEMP
); 
2958  * Do a sysctl by fsid. 
2961 sysctl_vfs_ctlbyfsid(__unused 
struct sysctl_oid 
*oidp
, void *arg1
, int arg2
, 
2962                 struct sysctl_req 
*req
) 
2965         struct user_vfsidctl user_vc
; 
2967         struct vfsstatfs 
*sp
; 
2968         int *name
, flags
, namelen
; 
2969         int error
=0, gotref
=0; 
2970         vfs_context_t ctx 
= vfs_context_current(); 
2971         proc_t p 
= req
->p
;      /* XXX req->p != current_proc()? */ 
2972         boolean_t is_64_bit
; 
2976         is_64_bit 
= proc_is64bit(p
); 
2979                 error 
= SYSCTL_IN(req
, &user_vc
, sizeof(user_vc
)); 
2982                 if (user_vc
.vc_vers 
!= VFS_CTL_VERS1
) { 
2986                 mp 
= mount_list_lookupby_fsid(&user_vc
.vc_fsid
, 0, 1); 
2989                 error 
= SYSCTL_IN(req
, &vc
, sizeof(vc
)); 
2992                 if (vc
.vc_vers 
!= VFS_CTL_VERS1
) { 
2996                 mp 
= mount_list_lookupby_fsid(&vc
.vc_fsid
, 0, 1); 
3003         /* reset so that the fs specific code can fetch it. */ 
3006          * Note if this is a VFS_CTL then we pass the actual sysctl req 
3007          * in for "oldp" so that the lower layer can DTRT and use the 
3008          * SYSCTL_IN/OUT routines. 
3010         if (mp
->mnt_op
->vfs_sysctl 
!= NULL
) { 
3012                         if (vfs_64bitready(mp
)) { 
3013                                 error 
= mp
->mnt_op
->vfs_sysctl(name
, namelen
, 
3014                                     CAST_USER_ADDR_T(req
), 
3015                                     NULL
, USER_ADDR_NULL
, 0,  
3023                         error 
= mp
->mnt_op
->vfs_sysctl(name
, namelen
, 
3024                             CAST_USER_ADDR_T(req
), 
3025                             NULL
, USER_ADDR_NULL
, 0,  
3028                 if (error 
!= ENOTSUP
) { 
3033         case VFS_CTL_UMOUNT
: 
3036                         req
->newptr 
= user_vc
.vc_ptr
; 
3037                         req
->newlen 
= (size_t)user_vc
.vc_len
; 
3040                         req
->newptr 
= CAST_USER_ADDR_T(vc
.vc_ptr
); 
3041                         req
->newlen 
= vc
.vc_len
; 
3043                 error 
= SYSCTL_IN(req
, &flags
, sizeof(flags
)); 
3050                 /* safedounmount consumes a ref */ 
3051                 error 
= safedounmount(mp
, flags
, ctx
); 
3053         case VFS_CTL_STATFS
: 
3056                         req
->newptr 
= user_vc
.vc_ptr
; 
3057                         req
->newlen 
= (size_t)user_vc
.vc_len
; 
3060                         req
->newptr 
= CAST_USER_ADDR_T(vc
.vc_ptr
); 
3061                         req
->newlen 
= vc
.vc_len
; 
3063                 error 
= SYSCTL_IN(req
, &flags
, sizeof(flags
)); 
3066                 sp 
= &mp
->mnt_vfsstat
; 
3067                 if (((flags 
& MNT_NOWAIT
) == 0 || (flags 
& MNT_WAIT
)) && 
3068                     (error 
= vfs_update_vfsstat(mp
, ctx
, VFS_USER_EVENT
))) 
3071                         struct user_statfs sfs
; 
3072                         bzero(&sfs
, sizeof(sfs
)); 
3073                         sfs
.f_flags 
= mp
->mnt_flag 
& MNT_VISFLAGMASK
; 
3074                         sfs
.f_type 
= mp
->mnt_vtable
->vfc_typenum
; 
3075                         sfs
.f_bsize 
= (user_long_t
)sp
->f_bsize
; 
3076                         sfs
.f_iosize 
= (user_long_t
)sp
->f_iosize
; 
3077                         sfs
.f_blocks 
= (user_long_t
)sp
->f_blocks
; 
3078                         sfs
.f_bfree 
= (user_long_t
)sp
->f_bfree
; 
3079                         sfs
.f_bavail 
= (user_long_t
)sp
->f_bavail
; 
3080                         sfs
.f_files 
= (user_long_t
)sp
->f_files
; 
3081                         sfs
.f_ffree 
= (user_long_t
)sp
->f_ffree
; 
3082                         sfs
.f_fsid 
= sp
->f_fsid
; 
3083                         sfs
.f_owner 
= sp
->f_owner
; 
3085                         strlcpy(sfs
.f_fstypename
, sp
->f_fstypename
, MFSNAMELEN
); 
3086                         strlcpy(sfs
.f_mntonname
, sp
->f_mntonname
, MNAMELEN
); 
3087                         strlcpy(sfs
.f_mntfromname
, sp
->f_mntfromname
, MNAMELEN
); 
3089                         error 
= SYSCTL_OUT(req
, &sfs
, sizeof(sfs
)); 
3093                         bzero(&sfs
, sizeof(struct statfs
)); 
3094                         sfs
.f_flags 
= mp
->mnt_flag 
& MNT_VISFLAGMASK
; 
3095                         sfs
.f_type 
= mp
->mnt_vtable
->vfc_typenum
; 
3098                          * It's possible for there to be more than 2^^31 blocks in the filesystem, so we 
3099                          * have to fudge the numbers here in that case.   We inflate the blocksize in order 
3100                          * to reflect the filesystem size as best we can. 
3102                         if (sp
->f_blocks 
> LONG_MAX
) { 
3106                                  * Work out how far we have to shift the block count down to make it fit. 
3107                                  * Note that it's possible to have to shift so far that the resulting 
3108                                  * blocksize would be unreportably large.  At that point, we will clip 
3109                                  * any values that don't fit. 
3111                                  * For safety's sake, we also ensure that f_iosize is never reported as 
3112                                  * being smaller than f_bsize. 
3114                                 for (shift 
= 0; shift 
< 32; shift
++) { 
3115                                         if ((sp
->f_blocks 
>> shift
) <= LONG_MAX
) 
3117                                         if ((sp
->f_bsize 
<< (shift 
+ 1)) > LONG_MAX
) 
3120 #define __SHIFT_OR_CLIP(x, s)   ((((x) >> (s)) > LONG_MAX) ? LONG_MAX : ((x) >> (s))) 
3121                                 sfs
.f_blocks 
= (long)__SHIFT_OR_CLIP(sp
->f_blocks
, shift
); 
3122                                 sfs
.f_bfree 
= (long)__SHIFT_OR_CLIP(sp
->f_bfree
, shift
); 
3123                                 sfs
.f_bavail 
= (long)__SHIFT_OR_CLIP(sp
->f_bavail
, shift
); 
3124 #undef __SHIFT_OR_CLIP 
3125                                 sfs
.f_bsize 
= (long)(sp
->f_bsize 
<< shift
); 
3126                                 sfs
.f_iosize 
= lmax(sp
->f_iosize
, sp
->f_bsize
); 
3128                                 sfs
.f_bsize 
= (long)sp
->f_bsize
; 
3129                                 sfs
.f_iosize 
= (long)sp
->f_iosize
; 
3130                                 sfs
.f_blocks 
= (long)sp
->f_blocks
; 
3131                                 sfs
.f_bfree 
= (long)sp
->f_bfree
; 
3132                                 sfs
.f_bavail 
= (long)sp
->f_bavail
; 
3134                         sfs
.f_files 
= (long)sp
->f_files
; 
3135                         sfs
.f_ffree 
= (long)sp
->f_ffree
; 
3136                         sfs
.f_fsid 
= sp
->f_fsid
; 
3137                         sfs
.f_owner 
= sp
->f_owner
; 
3139                         strlcpy(sfs
.f_fstypename
, sp
->f_fstypename
, MFSNAMELEN
); 
3140                         strlcpy(sfs
.f_mntonname
, sp
->f_mntonname
, MNAMELEN
); 
3141                         strlcpy(sfs
.f_mntfromname
, sp
->f_mntfromname
, MNAMELEN
); 
3143                         error 
= SYSCTL_OUT(req
, &sfs
, sizeof(sfs
)); 
3156 static int      filt_fsattach(struct knote 
*kn
); 
3157 static void     filt_fsdetach(struct knote 
*kn
); 
3158 static int      filt_fsevent(struct knote 
*kn
, long hint
); 
3160 struct filterops fs_filtops 
= 
3161         { 0, filt_fsattach
, filt_fsdetach
, filt_fsevent 
}; 
3164 filt_fsattach(struct knote 
*kn
) 
3167         lck_mtx_lock(fs_klist_lock
); 
3168         kn
->kn_flags 
|= EV_CLEAR
; 
3169         KNOTE_ATTACH(&fs_klist
, kn
); 
3170         lck_mtx_unlock(fs_klist_lock
); 
3175 filt_fsdetach(struct knote 
*kn
) 
3177         lck_mtx_lock(fs_klist_lock
); 
3178         KNOTE_DETACH(&fs_klist
, kn
); 
3179         lck_mtx_unlock(fs_klist_lock
); 
3183 filt_fsevent(struct knote 
*kn
, long hint
) 
3186          * Backwards compatibility: 
3187          * Other filters would do nothing if kn->kn_sfflags == 0 
3190         if ((kn
->kn_sfflags 
== 0) || (kn
->kn_sfflags 
& hint
)) { 
3191                 kn
->kn_fflags 
|= hint
; 
3194         return (kn
->kn_fflags 
!= 0); 
3198 sysctl_vfs_noremotehang(__unused 
struct sysctl_oid 
*oidp
, 
3199                 __unused 
void *arg1
, __unused 
int arg2
, struct sysctl_req 
*req
) 
3205         /* We need a pid. */ 
3206         if (req
->newptr 
== USER_ADDR_NULL
) 
3209         error 
= SYSCTL_IN(req
, &pid
, sizeof(pid
)); 
3213         p 
= proc_find(pid 
< 0 ? -pid 
: pid
); 
3218          * Fetching the value is ok, but we only fetch if the old 
3221         if (req
->oldptr 
!= USER_ADDR_NULL
) { 
3222                 out 
= !((p
->p_flag 
& P_NOREMOTEHANG
) == 0); 
3224                 error 
= SYSCTL_OUT(req
, &out
, sizeof(out
)); 
3228         /* cansignal offers us enough security. */ 
3229         if (p 
!= req
->p 
&& proc_suser(req
->p
) != 0) { 
3235                 OSBitAndAtomic(~((uint32_t)P_NOREMOTEHANG
), (UInt32 
*)&p
->p_flag
); 
3237                 OSBitOrAtomic(P_NOREMOTEHANG
, (UInt32 
*)&p
->p_flag
); 
3243 /* the vfs.generic. branch. */ 
3244 SYSCTL_NODE(_vfs
, VFS_GENERIC
, generic
, CTLFLAG_RW
|CTLFLAG_LOCKED
, NULL
, "vfs generic hinge"); 
3245 /* retreive a list of mounted filesystem fsid_t */ 
3246 SYSCTL_PROC(_vfs_generic
, OID_AUTO
, vfsidlist
, CTLFLAG_RD
, 
3247     NULL
, 0, sysctl_vfs_vfslist
, "S,fsid", "List of mounted filesystem ids"); 
3248 /* perform operations on filesystem via fsid_t */ 
3249 SYSCTL_NODE(_vfs_generic
, OID_AUTO
, ctlbyfsid
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 
3250     sysctl_vfs_ctlbyfsid
, "ctlbyfsid"); 
3251 SYSCTL_PROC(_vfs_generic
, OID_AUTO
, noremotehang
, CTLFLAG_RW
|CTLFLAG_ANYBODY
, 
3252     NULL
, 0, sysctl_vfs_noremotehang
, "I", "noremotehang"); 
3255 long num_reusedvnodes 
= 0;      /* long for OSAddAtomic */ 
3258 new_vnode(vnode_t 
*vpp
) 
3261         int retries 
= 0;                                /* retry incase of tablefull */ 
3262         int force_alloc 
= 0, walk_count 
= 0; 
3265         struct timeval current_tv
; 
3266         struct unsafe_fsnode 
*l_unsafefs 
= 0; 
3267         proc_t  curproc 
= current_proc(); 
3268         pid_t current_pid 
= proc_pid(curproc
); 
3271         microuptime(¤t_tv
); 
3277         if ( !TAILQ_EMPTY(&vnode_dead_list
)) { 
3279                  * Can always reuse a dead one 
3281                 vp 
= TAILQ_FIRST(&vnode_dead_list
); 
3285          * no dead vnodes available... if we're under 
3286          * the limit, we'll create a new vnode 
3288         if (numvnodes 
< desiredvnodes 
|| force_alloc
) { 
3290                 vnode_list_unlock(); 
3291                 MALLOC_ZONE(vp
, struct vnode 
*, sizeof(*vp
), M_VNODE
, M_WAITOK
); 
3292                 bzero((char *)vp
, sizeof(*vp
)); 
3293                 VLISTNONE(vp
);          /* avoid double queue removal */ 
3294                 lck_mtx_init(&vp
->v_lock
, vnode_lck_grp
, vnode_lck_attr
); 
3297                 vp
->v_id 
= ts
.tv_nsec
; 
3298                 vp
->v_flag 
= VSTANDARD
; 
3301                 mac_vnode_label_init(vp
); 
3308 #define MAX_WALK_COUNT 1000 
3310         if ( !TAILQ_EMPTY(&vnode_rage_list
) && 
3311              (ragevnodes 
>= rage_limit 
|| 
3312               (current_tv
.tv_sec 
- rage_tv
.tv_sec
) >= RAGE_TIME_LIMIT
)) { 
3314                 TAILQ_FOREACH(vp
, &vnode_rage_list
, v_freelist
) { 
3315                     if ( !(vp
->v_listflag 
& VLIST_RAGE
) || !(vp
->v_flag 
& VRAGE
)) 
3316                         panic("new_vnode: vp on RAGE list not marked both VLIST_RAGE and VRAGE"); 
3318                     // skip vnodes which have a dependency on this process 
3319                     // (i.e. they're vnodes in a disk image and this process 
3320                     // is diskimages-helper) 
3322                     if (vp
->v_mount 
&& vp
->v_mount
->mnt_dependent_pid 
!= current_pid 
&& vp
->v_mount
->mnt_dependent_process 
!= curproc
) { 
3326                     // don't iterate more than MAX_WALK_COUNT vnodes to 
3327                     // avoid keeping the vnode list lock held for too long. 
3328                     if (walk_count
++ > MAX_WALK_COUNT
) { 
3336         if (vp 
== NULL 
&& !TAILQ_EMPTY(&vnode_free_list
)) { 
3338                  * Pick the first vp for possible reuse 
3341                 TAILQ_FOREACH(vp
, &vnode_free_list
, v_freelist
) { 
3342                     // skip vnodes which have a dependency on this process 
3343                     // (i.e. they're vnodes in a disk image and this process 
3344                     // is diskimages-helper) 
3346                     if (vp
->v_mount 
&& vp
->v_mount
->mnt_dependent_pid 
!= current_pid 
&& vp
->v_mount
->mnt_dependent_process 
!= curproc
) { 
3350                     // don't iterate more than MAX_WALK_COUNT vnodes to 
3351                     // avoid keeping the vnode list lock held for too long. 
3352                     if (walk_count
++ > MAX_WALK_COUNT
) { 
3361         // if we don't have a vnode and the walk_count is >= MAX_WALK_COUNT 
3362         // then we're trying to create a vnode on behalf of a 
3363         // process like diskimages-helper that has file systems 
3364         // mounted on top of itself (and thus we can't reclaim 
3365         // vnodes in the file systems on top of us).  if we can't 
3366         // find a vnode to reclaim then we'll just have to force 
3369         if (vp 
== NULL 
&& walk_count 
>= MAX_WALK_COUNT
) { 
3371             vnode_list_unlock(); 
3377                  * we've reached the system imposed maximum number of vnodes 
3378                  * but there isn't a single one available 
3379                  * wait a bit and then retry... if we can't get a vnode 
3380                  * after 100 retries, than log a complaint 
3382                 if (++retries 
<= 100) { 
3383                         vnode_list_unlock(); 
3384                         delay_for_interval(1, 1000 * 1000); 
3388                 vnode_list_unlock(); 
3390                 log(LOG_EMERG
, "%d desired, %d numvnodes, " 
3391                         "%d free, %d dead, %d rage\n", 
3392                         desiredvnodes
, numvnodes
, freevnodes
, deadvnodes
, ragevnodes
); 
3399         vnode_list_remove_locked(vp
); 
3401         vnode_list_unlock(); 
3402         vnode_lock_spin(vp
); 
3405          * We could wait for the vnode_lock after removing the vp from the freelist 
3406          * and the vid is bumped only at the very end of reclaim. So it is  possible 
3407          * that we are looking at a vnode that is being terminated. If so skip it. 
3409         if ((vpid 
!= vp
->v_id
) || (vp
->v_usecount 
!= 0) || (vp
->v_iocount 
!= 0) ||  
3410                         VONLIST(vp
) || (vp
->v_lflag 
& VL_TERMINATE
)) { 
3412                  * we lost the race between dropping the list lock 
3413                  * and picking up the vnode_lock... someone else 
3414                  * used this vnode and it is now in a new state 
3415                  * so we need to go back and try again 
3420         if ( (vp
->v_lflag 
& (VL_NEEDINACTIVE 
| VL_MARKTERM
)) == VL_NEEDINACTIVE 
) { 
3422                  * we did a vnode_rele_ext that asked for 
3423                  * us not to reenter the filesystem during 
3424                  * the release even though VL_NEEDINACTIVE was 
3425                  * set... we'll do it here by doing a 
3426                  * vnode_get/vnode_put 
3428                  * pick up an iocount so that we can call 
3429                  * vnode_put and drive the VNOP_INACTIVE... 
3430                  * vnode_put will either leave us off  
3431                  * the freelist if a new ref comes in, 
3432                  * or put us back on the end of the freelist 
3433                  * or recycle us if we were marked for termination... 
3434                  * so we'll just go grab a new candidate 
3440                 vnode_put_locked(vp
); 
3444         OSAddAtomic(1, &num_reusedvnodes
); 
3446         /* Checks for anyone racing us for recycle */  
3447         if (vp
->v_type 
!= VBAD
) { 
3448                 if (vp
->v_lflag 
& VL_DEAD
) 
3449                         panic("new_vnode: the vnode is VL_DEAD but not VBAD"); 
3450                 vnode_lock_convert(vp
); 
3451                 (void)vnode_reclaim_internal(vp
, 1, 1, 0); 
3454                         panic("new_vnode: vp on list "); 
3455                 if (vp
->v_usecount 
|| vp
->v_iocount 
|| vp
->v_kusecount 
|| 
3456                     (vp
->v_lflag 
& (VNAMED_UBC 
| VNAMED_MOUNT 
| VNAMED_FSHASH
))) 
3457                         panic("new_vnode: free vnode still referenced\n"); 
3458                 if ((vp
->v_mntvnodes
.tqe_prev 
!= 0) && (vp
->v_mntvnodes
.tqe_next 
!= 0)) 
3459                         panic("new_vnode: vnode seems to be on mount list "); 
3460                 if ( !LIST_EMPTY(&vp
->v_nclinks
) || !LIST_EMPTY(&vp
->v_ncchildren
)) 
3461                         panic("new_vnode: vnode still hooked into the name cache"); 
3463         if (vp
->v_unsafefs
) { 
3464                 l_unsafefs 
= vp
->v_unsafefs
; 
3465                 vp
->v_unsafefs 
= (struct unsafe_fsnode 
*)NULL
; 
3470          * We should never see VL_LABELWAIT or VL_LABEL here. 
3471          * as those operations hold a reference. 
3473         assert ((vp
->v_lflag 
& VL_LABELWAIT
) != VL_LABELWAIT
); 
3474         assert ((vp
->v_lflag 
& VL_LABEL
) != VL_LABEL
); 
3475         if (vp
->v_lflag 
& VL_LABELED
) { 
3476                 vnode_lock_convert(vp
); 
3477                 mac_vnode_label_recycle(vp
); 
3483         vp
->v_writecount 
= 0; 
3484         vp
->v_references 
= 0; 
3485         vp
->v_iterblkflags 
= 0; 
3486         vp
->v_flag 
= VSTANDARD
; 
3487         /* vbad vnodes can point to dead_mountp */ 
3489         vp
->v_defer_reclaimlist 
= (vnode_t
)0; 
3494                 lck_mtx_destroy(&l_unsafefs
->fsnodelock
, vnode_lck_grp
); 
3495                 FREE_ZONE((void *)l_unsafefs
, sizeof(struct unsafe_fsnode
), M_UNSAFEFS
); 
3504 vnode_lock(vnode_t vp
) 
3506         lck_mtx_lock(&vp
->v_lock
); 
3510 vnode_lock_spin(vnode_t vp
) 
3512         lck_mtx_lock_spin(&vp
->v_lock
); 
3516 vnode_unlock(vnode_t vp
) 
3518         lck_mtx_unlock(&vp
->v_lock
); 
3524 vnode_get(struct vnode 
*vp
) 
3528         vnode_lock_spin(vp
); 
3529         retval 
= vnode_get_locked(vp
); 
3536 vnode_get_locked(struct vnode 
*vp
) 
3539         if ((vp
->v_iocount 
== 0) && (vp
->v_lflag 
& (VL_TERMINATE 
| VL_DEAD
))) { 
3550 vnode_getwithvid(vnode_t vp
, int vid
) 
3552         return(vget_internal(vp
, vid
, ( VNODE_NODEAD
| VNODE_WITHID
))); 
3556 vnode_getwithref(vnode_t vp
) 
3558         return(vget_internal(vp
, 0, 0)); 
3563 vnode_put(vnode_t vp
) 
3567         vnode_lock_spin(vp
); 
3568         retval 
= vnode_put_locked(vp
); 
3575 vnode_put_locked(vnode_t vp
) 
3577         vfs_context_t ctx 
= vfs_context_current();      /* hoist outside loop */ 
3580         if (vp
->v_iocount 
< 1)  
3581                 panic("vnode_put(%p): iocount < 1", vp
); 
3583         if ((vp
->v_usecount 
> 0) || (vp
->v_iocount 
> 1))  { 
3584                 vnode_dropiocount(vp
); 
3587         if ((vp
->v_lflag 
& (VL_MARKTERM 
| VL_TERMINATE 
| VL_DEAD 
| VL_NEEDINACTIVE
)) == VL_NEEDINACTIVE
) { 
3589                 vp
->v_lflag 
&= ~VL_NEEDINACTIVE
; 
3592                 VNOP_INACTIVE(vp
, ctx
); 
3594                 vnode_lock_spin(vp
); 
3596                  * because we had to drop the vnode lock before calling 
3597                  * VNOP_INACTIVE, the state of this vnode may have changed... 
3598                  * we may pick up both VL_MARTERM and either 
3599                  * an iocount or a usecount while in the VNOP_INACTIVE call 
3600                  * we don't want to call vnode_reclaim_internal on a vnode 
3601                  * that has active references on it... so loop back around 
3602                  * and reevaluate the state 
3606         vp
->v_lflag 
&= ~VL_NEEDINACTIVE
; 
3608         if ((vp
->v_lflag 
& (VL_MARKTERM 
| VL_TERMINATE 
| VL_DEAD
)) == VL_MARKTERM
) { 
3609                 vnode_lock_convert(vp
); 
3610                 vnode_reclaim_internal(vp
, 1, 1, 0); 
3612         vnode_dropiocount(vp
); 
3618 /* is vnode_t in use by others?  */ 
3620 vnode_isinuse(vnode_t vp
, int refcnt
) 
3622         return(vnode_isinuse_locked(vp
, refcnt
, 0)); 
3627 vnode_isinuse_locked(vnode_t vp
, int refcnt
, int locked
) 
3632                 vnode_lock_spin(vp
); 
3633         if ((vp
->v_type 
!= VREG
) && ((vp
->v_usecount 
- vp
->v_kusecount
) >  refcnt
)) { 
3637         if (vp
->v_type 
== VREG
)  { 
3638                 retval 
= ubc_isinuse_locked(vp
, refcnt
, 1); 
3648 /* resume vnode_t */ 
3650 vnode_resume(vnode_t vp
) 
3653         vnode_lock_spin(vp
); 
3655         if (vp
->v_owner 
== current_thread()) { 
3656                 vp
->v_lflag 
&= ~VL_SUSPENDED
; 
3659                 wakeup(&vp
->v_iocount
); 
3667  * Please do not use on more than one vnode at a time as it may 
3669  * xxx should we explicity prevent this from happening? 
3673 vnode_suspend(vnode_t vp
) 
3675         if (vp
->v_lflag 
& VL_SUSPENDED
) { 
3679         vnode_lock_spin(vp
); 
3682          * xxx is this sufficient to check if a vnode_drain is  
3686         if (vp
->v_owner 
== NULL
) { 
3687                 vp
->v_lflag 
|= VL_SUSPENDED
; 
3688                 vp
->v_owner 
= current_thread(); 
3698 vnode_drain(vnode_t vp
) 
3701         if (vp
->v_lflag 
& VL_DRAIN
) { 
3702                 panic("vnode_drain: recursuve drain"); 
3705         vp
->v_lflag 
|= VL_DRAIN
; 
3706         vp
->v_owner 
= current_thread(); 
3708         while (vp
->v_iocount 
> 1) 
3709                 msleep(&vp
->v_iocount
, &vp
->v_lock
, PVFS
, "vnode_drain", NULL
); 
3715  * if the number of recent references via vnode_getwithvid or vnode_getwithref 
3716  * exceeds this threshhold, than 'UN-AGE' the vnode by removing it from 
3717  * the LRU list if it's currently on it... once the iocount and usecount both drop 
3718  * to 0, it will get put back on the end of the list, effectively making it younger 
3719  * this allows us to keep actively referenced vnodes in the list without having 
3720  * to constantly remove and add to the list each time a vnode w/o a usecount is 
3721  * referenced which costs us taking and dropping a global lock twice. 
3723 #define UNAGE_THRESHHOLD        25 
3726 vnode_getiocount(vnode_t vp
, int vid
, int vflags
) 
3728         int nodead 
= vflags 
& VNODE_NODEAD
; 
3729         int nosusp 
= vflags 
& VNODE_NOSUSPEND
; 
3733                  * if it is a dead vnode with deadfs 
3735                 if (nodead 
&& (vp
->v_lflag 
& VL_DEAD
) && ((vp
->v_type 
== VBAD
) || (vp
->v_data 
== 0))) { 
3739                  * will return VL_DEAD ones 
3741                 if ((vp
->v_lflag 
& (VL_SUSPENDED 
| VL_DRAIN 
| VL_TERMINATE
)) == 0 ) { 
3745                  * if suspended vnodes are to be failed 
3747                 if (nosusp 
&& (vp
->v_lflag 
& VL_SUSPENDED
)) { 
3751                  * if you are the owner of drain/suspend/termination , can acquire iocount 
3752                  * check for VL_TERMINATE; it does not set owner 
3754                 if ((vp
->v_lflag 
& (VL_DRAIN 
| VL_SUSPENDED 
| VL_TERMINATE
)) && 
3755                     (vp
->v_owner 
== current_thread())) { 
3758                 vnode_lock_convert(vp
); 
3760                 if (vp
->v_lflag 
& VL_TERMINATE
) { 
3761                         vp
->v_lflag 
|= VL_TERMWANT
; 
3763                         msleep(&vp
->v_lflag
,   &vp
->v_lock
, PVFS
, "vnode getiocount", NULL
); 
3765                         msleep(&vp
->v_iocount
, &vp
->v_lock
, PVFS
, "vnode_getiocount", NULL
); 
3767         if (vid 
!= vp
->v_id
) { 
3770         if (++vp
->v_references 
>= UNAGE_THRESHHOLD
) { 
3771                 vp
->v_references 
= 0; 
3772                 vnode_list_remove(vp
); 
3782 vnode_dropiocount (vnode_t vp
) 
3784         if (vp
->v_iocount 
< 1) 
3785                 panic("vnode_dropiocount(%p): v_iocount < 1", vp
); 
3791         if ((vp
->v_lflag 
& (VL_DRAIN 
| VL_SUSPENDED
)) && (vp
->v_iocount 
<= 1)) { 
3792                 vnode_lock_convert(vp
); 
3793                 wakeup(&vp
->v_iocount
); 
3799 vnode_reclaim(struct vnode 
* vp
) 
3801         vnode_reclaim_internal(vp
, 0, 0, 0); 
3806 vnode_reclaim_internal(struct vnode 
* vp
, int locked
, int reuse
, int flags
) 
3813         if (vp
->v_lflag 
& VL_TERMINATE
) { 
3814                 panic("vnode reclaim in progress"); 
3816         vp
->v_lflag 
|= VL_TERMINATE
; 
3818         vn_clearunionwait(vp
, 1); 
3820         if (vnode_drain(vp
)) { 
3821                 panic("vnode drain failed"); 
3825         isfifo 
= (vp
->v_type 
== VFIFO
); 
3827         if (vp
->v_type 
!= VBAD
) 
3828                 vgone(vp
, flags
);               /* clean and reclaim the vnode */ 
3831          * give the vnode a new identity so that vnode_getwithvid will fail 
3832          * on any stale cache accesses... 
3833          * grab the list_lock so that if we're in "new_vnode" 
3834          * behind the list_lock trying to steal this vnode, the v_id is stable... 
3835          * once new_vnode drops the list_lock, it will block trying to take 
3836          * the vnode lock until we release it... at that point it will evaluate 
3837          * whether the v_vid has changed 
3838          * also need to make sure that the vnode isn't on a list where "new_vnode" 
3839          * can find it after the v_id has been bumped until we are completely done 
3840          * with the vnode (i.e. putting it back on a list has to be the very last 
3841          * thing we do to this vnode... many of the callers of vnode_reclaim_internal 
3842          * are holding an io_count on the vnode... they need to drop the io_count 
3843          * BEFORE doing a vnode_list_add or make sure to hold the vnode lock until 
3844          * they are completely done with the vnode 
3848         vnode_list_remove_locked(vp
); 
3851         vnode_list_unlock(); 
3854                 struct fifoinfo 
* fip
; 
3856                 fip 
= vp
->v_fifoinfo
; 
3857                 vp
->v_fifoinfo 
= NULL
; 
3864                 panic("vnode_reclaim_internal: cleaned vnode isn't"); 
3865         if (vp
->v_numoutput
) 
3866                 panic("vnode_reclaim_internal: clean vnode has pending I/O's"); 
3867         if (UBCINFOEXISTS(vp
)) 
3868                 panic("vnode_reclaim_internal: ubcinfo not cleaned"); 
3870                 panic("vnode_reclaim_internal: vparent not removed"); 
3872                 panic("vnode_reclaim_internal: vname not removed"); 
3874         vp
->v_socket 
= NULL
; 
3876         vp
->v_lflag 
&= ~VL_TERMINATE
; 
3877         vp
->v_lflag 
&= ~VL_DRAIN
; 
3880         if (vp
->v_lflag 
& VL_TERMWANT
) { 
3881                 vp
->v_lflag 
&= ~VL_TERMWANT
; 
3882                 wakeup(&vp
->v_lflag
); 
3886                  * make sure we get on the 
3887                  * dead list if appropriate 
3896  *  vnode_create(int flavor, size_t size, void * param,  vnode_t  *vp) 
3899 vnode_create(int flavor
, size_t size
, void *data
, vnode_t 
*vpp
) 
3907         struct componentname 
*cnp
; 
3908         struct vnode_fsparam 
*param 
= (struct vnode_fsparam 
*)data
; 
3910         if (flavor 
== VNCREATE_FLAVOR 
&& (size 
== VCREATESIZE
) && param
) { 
3911                 if ( (error 
= new_vnode(&vp
)) ) { 
3914                         dvp 
= param
->vnfs_dvp
; 
3915                         cnp 
= param
->vnfs_cnp
; 
3917                         vp
->v_op 
= param
->vnfs_vops
; 
3918                         vp
->v_type 
= param
->vnfs_vtype
; 
3919                         vp
->v_data 
= param
->vnfs_fsnode
; 
3921                         if (param
->vnfs_markroot
) 
3922                                 vp
->v_flag 
|= VROOT
; 
3923                         if (param
->vnfs_marksystem
) 
3924                                 vp
->v_flag 
|= VSYSTEM
; 
3925                         if (vp
->v_type 
== VREG
) { 
3926                                 error 
= ubc_info_init_withsize(vp
, param
->vnfs_filesize
); 
3932                                         vp
->v_op 
= dead_vnodeop_p
; 
3936                                         vp
->v_lflag 
|= VL_DEAD
; 
3945                         if (vp
->v_type 
== VCHR 
|| vp
->v_type 
== VBLK
) { 
3947                                 vp
->v_tag 
= VT_DEVFS
;           /* callers will reset if needed (bdevvp) */ 
3949                                 if ( (nvp 
= checkalias(vp
, param
->vnfs_rdev
)) ) { 
3951                                          * if checkalias returns a vnode, it will be locked 
3953                                          * first get rid of the unneeded vnode we acquired 
3956                                         vp
->v_op 
= spec_vnodeop_p
; 
3958                                         vp
->v_lflag 
= VL_DEAD
; 
3964                                          * switch to aliased vnode and finish 
3970                                         vp
->v_op 
= param
->vnfs_vops
; 
3971                                         vp
->v_type 
= param
->vnfs_vtype
; 
3972                                         vp
->v_data 
= param
->vnfs_fsnode
; 
3975                                         insmntque(vp
, param
->vnfs_mp
); 
3981                         if (vp
->v_type 
== VFIFO
) { 
3982                                 struct fifoinfo 
*fip
; 
3984                                 MALLOC(fip
, struct fifoinfo 
*, 
3985                                         sizeof(*fip
), M_TEMP
, M_WAITOK
); 
3986                                 bzero(fip
, sizeof(struct fifoinfo 
)); 
3987                                 vp
->v_fifoinfo 
= fip
; 
3989                         /* The file systems usually pass the address of the location where 
3990                          * where there store  the vnode pointer. When we add the vnode in mount 
3991                          * point and name cache they are discoverable. So the file system node 
3992                          * will have the connection to vnode setup by then 
3996                         /* Add fs named reference. */ 
3997                         if (param
->vnfs_flags 
& VNFS_ADDFSREF
) { 
3998                                 vp
->v_lflag 
|= VNAMED_FSHASH
; 
4000                         if (param
->vnfs_mp
) { 
4001                                         if (param
->vnfs_mp
->mnt_kern_flag 
& MNTK_LOCK_LOCAL
) 
4002                                                 vp
->v_flag 
|= VLOCKLOCAL
; 
4005                                          * enter in mount vnode list 
4007                                         insmntque(vp
, param
->vnfs_mp
); 
4009 #ifdef INTERIM_FSNODE_LOCK       
4010                                 if (param
->vnfs_mp
->mnt_vtable
->vfc_threadsafe 
== 0) { 
4011                                         MALLOC_ZONE(vp
->v_unsafefs
, struct unsafe_fsnode 
*, 
4012                                                     sizeof(struct unsafe_fsnode
), M_UNSAFEFS
, M_WAITOK
); 
4013                                         vp
->v_unsafefs
->fsnode_count 
= 0; 
4014                                         vp
->v_unsafefs
->fsnodeowner  
= (void *)NULL
; 
4015                                         lck_mtx_init(&vp
->v_unsafefs
->fsnodelock
, vnode_lck_grp
, vnode_lck_attr
); 
4017 #endif /* INTERIM_FSNODE_LOCK */ 
4019                         if (dvp 
&& vnode_ref(dvp
) == 0) { 
4023                                 if (dvp 
&& ((param
->vnfs_flags 
& (VNFS_NOCACHE 
| VNFS_CANTCACHE
)) == 0)) { 
4025                                          * enter into name cache 
4026                                          * we've got the info to enter it into the name cache now 
4028                                         cache_enter(dvp
, vp
, cnp
); 
4030                                 vp
->v_name 
= vfs_addname(cnp
->cn_nameptr
, cnp
->cn_namelen
, cnp
->cn_hash
, 0); 
4031                                 if ((cnp
->cn_flags 
& UNIONCREATED
) == UNIONCREATED
) 
4032                                         vp
->v_flag 
|= VISUNION
; 
4034                         if ((param
->vnfs_flags 
& VNFS_CANTCACHE
) == 0) { 
4036                                  * this vnode is being created as cacheable in the name cache 
4037                                  * this allows us to re-enter it in the cache 
4039                                 vp
->v_flag 
|= VNCACHEABLE
; 
4041                         ut 
= get_bsdthread_info(current_thread()); 
4043                         if ((current_proc()->p_lflag 
& P_LRAGE_VNODES
) || 
4044                             (ut
->uu_flag 
& UT_RAGE_VNODES
)) { 
4046                                  * process has indicated that it wants any 
4047                                  * vnodes created on its behalf to be rapidly 
4048                                  * aged to reduce the impact on the cached set 
4051                                 vp
->v_flag 
|= VRAGE
; 
4060 vnode_addfsref(vnode_t vp
) 
4062         vnode_lock_spin(vp
); 
4063         if (vp
->v_lflag 
& VNAMED_FSHASH
) 
4064                 panic("add_fsref: vp already has named reference"); 
4065         if ((vp
->v_freelist
.tqe_prev 
!= (struct vnode 
**)0xdeadb)) 
4066                 panic("addfsref: vp on the free list\n"); 
4067         vp
->v_lflag 
|= VNAMED_FSHASH
; 
4073 vnode_removefsref(vnode_t vp
) 
4075         vnode_lock_spin(vp
); 
4076         if ((vp
->v_lflag 
& VNAMED_FSHASH
) == 0) 
4077                 panic("remove_fsref: no named reference"); 
4078         vp
->v_lflag 
&= ~VNAMED_FSHASH
; 
4086 vfs_iterate(__unused 
int flags
, int (*callout
)(mount_t
, void *), void *arg
) 
4091         int count
, actualcount
,  i
; 
4094         count 
= mount_getvfscnt(); 
4097         fsid_list 
= (fsid_t 
*)kalloc(count 
* sizeof(fsid_t
)); 
4098         allocmem 
= (void *)fsid_list
; 
4100         actualcount 
= mount_fillfsids(fsid_list
, count
); 
4102         for (i
=0; i
< actualcount
; i
++) { 
4104                 /* obtain the mount point with iteration reference */ 
4105                 mp 
= mount_list_lookupby_fsid(&fsid_list
[i
], 0, 1); 
4107                 if(mp 
== (struct mount 
*)0) 
4110                 if (mp
->mnt_lflag 
& (MNT_LDEAD 
| MNT_LUNMOUNT
)) { 
4118                 /* iterate over all the vnodes */ 
4119                 ret 
= callout(mp
, arg
); 
4125                 case VFS_RETURNED_DONE
: 
4126                         if (ret 
== VFS_RETURNED_DONE
) { 
4132                 case VFS_CLAIMED_DONE
: 
4143         kfree(allocmem
, (count 
* sizeof(fsid_t
))); 
4148  * Update the vfsstatfs structure in the mountpoint. 
4149  * MAC: Parameter eventtype added, indicating whether the event that 
4150  * triggered this update came from user space, via a system call 
4151  * (VFS_USER_EVENT) or an internal kernel call (VFS_KERNEL_EVENT). 
4154 vfs_update_vfsstat(mount_t mp
, vfs_context_t ctx
, __unused 
int eventtype
) 
4160          * Request the attributes we want to propagate into 
4161          * the per-mount vfsstat structure. 
4164         VFSATTR_WANTED(&va
, f_iosize
); 
4165         VFSATTR_WANTED(&va
, f_blocks
); 
4166         VFSATTR_WANTED(&va
, f_bfree
); 
4167         VFSATTR_WANTED(&va
, f_bavail
); 
4168         VFSATTR_WANTED(&va
, f_bused
); 
4169         VFSATTR_WANTED(&va
, f_files
); 
4170         VFSATTR_WANTED(&va
, f_ffree
); 
4171         VFSATTR_WANTED(&va
, f_bsize
); 
4172         VFSATTR_WANTED(&va
, f_fssubtype
); 
4174         if (eventtype 
== VFS_USER_EVENT
) { 
4175                 error 
= mac_mount_check_getattr(ctx
, mp
, &va
); 
4181         if ((error 
= vfs_getattr(mp
, &va
, ctx
)) != 0) { 
4182                 KAUTH_DEBUG("STAT - filesystem returned error %d", error
); 
4187          * Unpack into the per-mount structure. 
4189          * We only overwrite these fields, which are likely to change: 
4197          * And these which are not, but which the FS has no other way 
4198          * of providing to us: 
4204         if (VFSATTR_IS_SUPPORTED(&va
, f_bsize
)) { 
4205                 /* 4822056 - protect against malformed server mount */ 
4206                 mp
->mnt_vfsstat
.f_bsize 
= (va
.f_bsize 
> 0 ? va
.f_bsize 
: 512); 
4208                 mp
->mnt_vfsstat
.f_bsize 
= mp
->mnt_devblocksize
; /* default from the device block size */ 
4210         if (VFSATTR_IS_SUPPORTED(&va
, f_iosize
)) { 
4211                 mp
->mnt_vfsstat
.f_iosize 
= va
.f_iosize
; 
4213                 mp
->mnt_vfsstat
.f_iosize 
= 1024 * 1024;         /* 1MB sensible I/O size */ 
4215         if (VFSATTR_IS_SUPPORTED(&va
, f_blocks
)) 
4216                 mp
->mnt_vfsstat
.f_blocks 
= va
.f_blocks
; 
4217         if (VFSATTR_IS_SUPPORTED(&va
, f_bfree
)) 
4218                 mp
->mnt_vfsstat
.f_bfree 
= va
.f_bfree
; 
4219         if (VFSATTR_IS_SUPPORTED(&va
, f_bavail
)) 
4220                 mp
->mnt_vfsstat
.f_bavail 
= va
.f_bavail
; 
4221         if (VFSATTR_IS_SUPPORTED(&va
, f_bused
)) 
4222                 mp
->mnt_vfsstat
.f_bused 
= va
.f_bused
; 
4223         if (VFSATTR_IS_SUPPORTED(&va
, f_files
)) 
4224                 mp
->mnt_vfsstat
.f_files 
= va
.f_files
; 
4225         if (VFSATTR_IS_SUPPORTED(&va
, f_ffree
)) 
4226                 mp
->mnt_vfsstat
.f_ffree 
= va
.f_ffree
; 
4228         /* this is unlikely to change, but has to be queried for */ 
4229         if (VFSATTR_IS_SUPPORTED(&va
, f_fssubtype
)) 
4230                 mp
->mnt_vfsstat
.f_fssubtype 
= va
.f_fssubtype
; 
4236 mount_list_add(mount_t mp
) 
4239         TAILQ_INSERT_TAIL(&mountlist
, mp
, mnt_list
);     
4241         mount_list_unlock(); 
4245 mount_list_remove(mount_t mp
) 
4248         TAILQ_REMOVE(&mountlist
, mp
, mnt_list
); 
4250         mp
->mnt_list
.tqe_next 
= NULL
; 
4251         mp
->mnt_list
.tqe_prev 
= NULL
; 
4252         mount_list_unlock(); 
4257 mount_lookupby_volfsid(int volfs_id
, int withref
) 
4259         mount_t cur_mount 
= (mount_t
)0; 
4263         TAILQ_FOREACH(mp
, &mountlist
, mnt_list
) { 
4264                 if (!(mp
->mnt_kern_flag 
& MNTK_UNMOUNT
) && 
4265                     (mp
->mnt_kern_flag 
& MNTK_PATH_FROM_ID
) && 
4266                     (mp
->mnt_vfsstat
.f_fsid
.val
[0] == volfs_id
)) { 
4269                                 if (mount_iterref(cur_mount
, 1))  { 
4270                                         cur_mount 
= (mount_t
)0; 
4271                                         mount_list_unlock(); 
4278         mount_list_unlock(); 
4279         if (withref 
&& (cur_mount 
!= (mount_t
)0)) { 
4281                 if (vfs_busy(mp
, LK_NOWAIT
) != 0) { 
4282                         cur_mount 
= (mount_t
)0; 
4293 mount_list_lookupby_fsid(fsid_t 
*fsid
, int locked
, int withref
) 
4295         mount_t retmp 
= (mount_t
)0; 
4300         TAILQ_FOREACH(mp
, &mountlist
, mnt_list
)  
4301                 if (mp
->mnt_vfsstat
.f_fsid
.val
[0] == fsid
->val
[0] && 
4302                     mp
->mnt_vfsstat
.f_fsid
.val
[1] == fsid
->val
[1]) { 
4305                                 if (mount_iterref(retmp
, 1))  
4312                 mount_list_unlock(); 
4317 vnode_lookup(const char *path
, int flags
, vnode_t 
*vpp
, vfs_context_t ctx
) 
4319         struct nameidata nd
; 
4323         if (ctx 
== NULL
) {              /* XXX technically an error */ 
4324                 ctx 
= vfs_context_current(); 
4327         if (flags 
& VNODE_LOOKUP_NOFOLLOW
) 
4332         if (flags 
& VNODE_LOOKUP_NOCROSSMOUNT
) 
4333                 ndflags 
|= NOCROSSMOUNT
; 
4334         if (flags 
& VNODE_LOOKUP_DOWHITEOUT
) 
4335                 ndflags 
|= DOWHITEOUT
; 
4337         /* XXX AUDITVNPATH1 needed ? */ 
4338         NDINIT(&nd
, LOOKUP
, ndflags
, UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
); 
4340         if ((error 
= namei(&nd
))) 
4349 vnode_open(const char *path
, int fmode
, int cmode
, int flags
, vnode_t 
*vpp
, vfs_context_t ctx
) 
4351         struct nameidata nd
; 
4356         if (ctx 
== NULL
) {              /* XXX technically an error */ 
4357                 ctx 
= vfs_context_current(); 
4360         if (fmode 
& O_NOFOLLOW
) 
4361                 lflags 
|= VNODE_LOOKUP_NOFOLLOW
; 
4363         if (lflags 
& VNODE_LOOKUP_NOFOLLOW
) 
4368         if (lflags 
& VNODE_LOOKUP_NOCROSSMOUNT
) 
4369                 ndflags 
|= NOCROSSMOUNT
; 
4370         if (lflags 
& VNODE_LOOKUP_DOWHITEOUT
) 
4371                 ndflags 
|= DOWHITEOUT
; 
4373         /* XXX AUDITVNPATH1 needed ? */ 
4374         NDINIT(&nd
, LOOKUP
, ndflags
, UIO_SYSSPACE
, CAST_USER_ADDR_T(path
), ctx
); 
4376         if ((error 
= vn_open(&nd
, fmode
, cmode
))) 
4385 vnode_close(vnode_t vp
, int flags
, vfs_context_t ctx
) 
4390                 ctx 
= vfs_context_current(); 
4393         error 
= vn_close(vp
, flags
, ctx
); 
4399  * Returns:     0                       Success 
4403 vnode_size(vnode_t vp
, off_t 
*sizep
, vfs_context_t ctx
) 
4405         struct vnode_attr       va
; 
4409         VATTR_WANTED(&va
, va_data_size
); 
4410         error 
= vnode_getattr(vp
, &va
, ctx
); 
4412                 *sizep 
= va
.va_data_size
; 
4417 vnode_setsize(vnode_t vp
, off_t size
, int ioflag
, vfs_context_t ctx
) 
4419         struct vnode_attr       va
; 
4422         VATTR_SET(&va
, va_data_size
, size
); 
4423         va
.va_vaflags 
= ioflag 
& 0xffff; 
4424         return(vnode_setattr(vp
, &va
, ctx
)); 
4428  * Create a filesystem object of arbitrary type with arbitrary attributes in 
4429  * the spevied directory with the specified name. 
4431  * Parameters:  dvp                     Pointer to the vnode of the directory 
4432  *                                      in which to create the object. 
4433  *              vpp                     Pointer to the area into which to 
4434  *                                      return the vnode of the created object. 
4435  *              cnp                     Component name pointer from the namei 
4436  *                                      data structure, containing the name to 
4437  *                                      use for the create object. 
4438  *              vap                     Pointer to the vnode_attr structure 
4439  *                                      describing the object to be created, 
4440  *                                      including the type of object. 
4441  *              flags                   VN_* flags controlling ACL inheritance 
4442  *                                      and whether or not authorization is to 
4443  *                                      be required for the operation. 
4445  * Returns:     0                       Success 
4448  * Implicit:    *vpp                    Contains the vnode of the object that 
4449  *                                      was created, if successful. 
4450  *              *cnp                    May be modified by the underlying VFS. 
4451  *              *vap                    May be modified by the underlying VFS. 
4452  *                                      modified by either ACL inheritance or 
4455  *                                      be modified, even if the operation is 
4458  * Notes:       The kauth_filesec_t in 'vap', if any, is in host byte order. 
4460  *              Modification of '*cnp' and '*vap' by the underlying VFS is 
4461  *              strongly discouraged. 
4463  * XXX:         This function is a 'vn_*' function; it belongs in vfs_vnops.c 
4465  * XXX:         We should enummerate the possible errno values here, and where 
4466  *              in the code they originated. 
4469 vn_create(vnode_t dvp
, vnode_t 
*vpp
, struct componentname 
*cnp
, struct vnode_attr 
*vap
, int flags
, vfs_context_t ctx
) 
4471         kauth_acl_t oacl
, nacl
; 
4474         vnode_t vp 
= (vnode_t
)0; 
4480         KAUTH_DEBUG("%p    CREATE - '%s'", dvp
, cnp
->cn_nameptr
); 
4483          * Handle ACL inheritance. 
4485         if (!(flags 
& VN_CREATE_NOINHERIT
) && vfs_extendedsecurity(dvp
->v_mount
)) { 
4486                 /* save the original filesec */ 
4487                 if (VATTR_IS_ACTIVE(vap
, va_acl
)) { 
4493                 if ((error 
= kauth_acl_inherit(dvp
, 
4496                          vap
->va_type 
== VDIR
, 
4498                         KAUTH_DEBUG("%p    CREATE - error %d processing inheritance", dvp
, error
); 
4503                  * If the generated ACL is NULL, then we can save ourselves some effort 
4504                  * by clearing the active bit. 
4507                         VATTR_CLEAR_ACTIVE(vap
, va_acl
); 
4509                         VATTR_SET(vap
, va_acl
, nacl
); 
4514          * Check and default new attributes. 
4515          * This will set va_uid, va_gid, va_mode and va_create_time at least, if the caller 
4516          * hasn't supplied them. 
4518         if ((error 
= vnode_authattr_new(dvp
, vap
, flags 
& VN_CREATE_NOAUTH
, ctx
)) != 0) { 
4519                 KAUTH_DEBUG("%p    CREATE - error %d handing/defaulting attributes", dvp
, error
); 
4525          * Create the requested node. 
4527         switch(vap
->va_type
) { 
4529                 error 
= VNOP_CREATE(dvp
, vpp
, cnp
, vap
, ctx
); 
4532                 error 
= VNOP_MKDIR(dvp
, vpp
, cnp
, vap
, ctx
); 
4538                 error 
= VNOP_MKNOD(dvp
, vpp
, cnp
, vap
, ctx
); 
4541                 panic("vnode_create: unknown vtype %d", vap
->va_type
); 
4544                 KAUTH_DEBUG("%p    CREATE - error %d returned by filesystem", dvp
, error
); 
4550         if (!(flags 
& VN_CREATE_NOLABEL
)) { 
4551                 error 
= vnode_label(vnode_mount(vp
), dvp
, vp
, cnp
, 
4552                     VNODE_LABEL_CREATE
|VNODE_LABEL_NEEDREF
, ctx
); 
4559          * If some of the requested attributes weren't handled by the VNOP, 
4560          * use our fallback code. 
4562         if (!VATTR_ALL_SUPPORTED(vap
) && *vpp
) { 
4563                 KAUTH_DEBUG("     CREATE - doing fallback with ACL %p", vap
->va_acl
); 
4564                 error 
= vnode_setattr_fallback(*vpp
, vap
, ctx
); 
4569         if ((error 
!= 0 ) && (vp 
!= (vnode_t
)0)) { 
4576          * If the caller supplied a filesec in vap, it has been replaced 
4577          * now by the post-inheritance copy.  We need to put the original back 
4578          * and free the inherited product. 
4581                 VATTR_SET(vap
, va_acl
, oacl
); 
4583                 VATTR_CLEAR_ACTIVE(vap
, va_acl
); 
4586                 kauth_acl_free(nacl
); 
4591 static kauth_scope_t    vnode_scope
; 
4592 static int      vnode_authorize_callback(kauth_cred_t credential
, void *idata
, kauth_action_t action
, 
4593     uintptr_t arg0
, uintptr_t arg1
, uintptr_t arg2
, uintptr_t arg3
); 
4594 static int      vnode_authorize_callback_int(__unused kauth_cred_t credential
, __unused 
void *idata
, kauth_action_t action
, 
4595     uintptr_t arg0
, uintptr_t arg1
, uintptr_t arg2
, uintptr_t arg3
); 
4597 typedef struct _vnode_authorize_context 
{ 
4599         struct vnode_attr 
*vap
; 
4601         struct vnode_attr 
*dvap
; 
4605 #define _VAC_IS_OWNER           (1<<0) 
4606 #define _VAC_IN_GROUP           (1<<1) 
4607 #define _VAC_IS_DIR_OWNER       (1<<2) 
4608 #define _VAC_IN_DIR_GROUP       (1<<3) 
4612 vnode_authorize_init(void) 
4614         vnode_scope 
= kauth_register_scope(KAUTH_SCOPE_VNODE
, vnode_authorize_callback
, NULL
); 
4618  * Authorize an operation on a vnode. 
4620  * This is KPI, but here because it needs vnode_scope. 
4622  * Returns:     0                       Success 
4623  *      kauth_authorize_action:EPERM    ... 
4624  *      xlate => EACCES                 Permission denied 
4625  *      kauth_authorize_action:0        Success 
4626  *      kauth_authorize_action:         Depends on callback return; this is 
4627  *                                      usually only vnode_authorize_callback(), 
4628  *                                      but may include other listerners, if any 
4636 vnode_authorize(vnode_t vp
, vnode_t dvp
, kauth_action_t action
, vfs_context_t ctx
) 
4641          * We can't authorize against a dead vnode; allow all operations through so that 
4642          * the correct error can be returned. 
4644         if (vp
->v_type 
== VBAD
) 
4648         result 
= kauth_authorize_action(vnode_scope
, vfs_context_ucred(ctx
), action
, 
4649                    (uintptr_t)ctx
, (uintptr_t)vp
, (uintptr_t)dvp
, (uintptr_t)&error
); 
4650         if (result 
== EPERM
)            /* traditional behaviour */ 
4652         /* did the lower layers give a better error return? */ 
4653         if ((result 
!= 0) && (error 
!= 0)) 
4659  * Test for vnode immutability. 
4661  * The 'append' flag is set when the authorization request is constrained 
4662  * to operations which only request the right to append to a file. 
4664  * The 'ignore' flag is set when an operation modifying the immutability flags 
4665  * is being authorized.  We check the system securelevel to determine which 
4666  * immutability flags we can ignore. 
4669 vnode_immutable(struct vnode_attr 
*vap
, int append
, int ignore
) 
4673         /* start with all bits precluding the operation */ 
4674         mask 
= IMMUTABLE 
| APPEND
; 
4676         /* if appending only, remove the append-only bits */ 
4680         /* ignore only set when authorizing flags changes */ 
4682                 if (securelevel 
<= 0) { 
4683                         /* in insecure state, flags do not inhibit changes */ 
4686                         /* in secure state, user flags don't inhibit */ 
4687                         mask 
&= ~(UF_IMMUTABLE 
| UF_APPEND
); 
4690         KAUTH_DEBUG("IMMUTABLE - file flags 0x%x mask 0x%x append = %d ignore = %d", vap
->va_flags
, mask
, append
, ignore
); 
4691         if ((vap
->va_flags 
& mask
) != 0) 
4697 vauth_node_owner(struct vnode_attr 
*vap
, kauth_cred_t cred
) 
4701         /* default assumption is not-owner */ 
4705          * If the filesystem has given us a UID, we treat this as authoritative. 
4707         if (vap 
&& VATTR_IS_SUPPORTED(vap
, va_uid
)) { 
4708                 result 
= (vap
->va_uid 
== kauth_cred_getuid(cred
)) ? 1 : 0; 
4710         /* we could test the owner UUID here if we had a policy for it */ 
4716 vauth_node_group(struct vnode_attr 
*vap
, kauth_cred_t cred
, int *ismember
) 
4724         /* the caller is expected to have asked the filesystem for a group at some point */ 
4725         if (vap 
&& VATTR_IS_SUPPORTED(vap
, va_gid
)) { 
4726                 error 
= kauth_cred_ismember_gid(cred
, vap
->va_gid
, &result
); 
4728         /* we could test the group UUID here if we had a policy for it */ 
4736 vauth_file_owner(vauth_ctx vcp
) 
4740         if (vcp
->flags_valid 
& _VAC_IS_OWNER
) { 
4741                 result 
= (vcp
->flags 
& _VAC_IS_OWNER
) ? 1 : 0; 
4743                 result 
= vauth_node_owner(vcp
->vap
, vcp
->ctx
->vc_ucred
); 
4745                 /* cache our result */ 
4746                 vcp
->flags_valid 
|= _VAC_IS_OWNER
; 
4748                         vcp
->flags 
|= _VAC_IS_OWNER
; 
4750                         vcp
->flags 
&= ~_VAC_IS_OWNER
; 
4757 vauth_file_ingroup(vauth_ctx vcp
, int *ismember
) 
4761         if (vcp
->flags_valid 
& _VAC_IN_GROUP
) { 
4762                 *ismember 
= (vcp
->flags 
& _VAC_IN_GROUP
) ? 1 : 0; 
4765                 error 
= vauth_node_group(vcp
->vap
, vcp
->ctx
->vc_ucred
, ismember
); 
4768                         /* cache our result */ 
4769                         vcp
->flags_valid 
|= _VAC_IN_GROUP
; 
4771                                 vcp
->flags 
|= _VAC_IN_GROUP
; 
4773                                 vcp
->flags 
&= ~_VAC_IN_GROUP
; 
4782 vauth_dir_owner(vauth_ctx vcp
) 
4786         if (vcp
->flags_valid 
& _VAC_IS_DIR_OWNER
) { 
4787                 result 
= (vcp
->flags 
& _VAC_IS_DIR_OWNER
) ? 1 : 0; 
4789                 result 
= vauth_node_owner(vcp
->dvap
, vcp
->ctx
->vc_ucred
); 
4791                 /* cache our result */ 
4792                 vcp
->flags_valid 
|= _VAC_IS_DIR_OWNER
; 
4794                         vcp
->flags 
|= _VAC_IS_DIR_OWNER
; 
4796                         vcp
->flags 
&= ~_VAC_IS_DIR_OWNER
; 
4803 vauth_dir_ingroup(vauth_ctx vcp
, int *ismember
) 
4807         if (vcp
->flags_valid 
& _VAC_IN_DIR_GROUP
) { 
4808                 *ismember 
= (vcp
->flags 
& _VAC_IN_DIR_GROUP
) ? 1 : 0; 
4811                 error 
= vauth_node_group(vcp
->dvap
, vcp
->ctx
->vc_ucred
, ismember
); 
4814                         /* cache our result */ 
4815                         vcp
->flags_valid 
|= _VAC_IN_DIR_GROUP
; 
4817                                 vcp
->flags 
|= _VAC_IN_DIR_GROUP
; 
4819                                 vcp
->flags 
&= ~_VAC_IN_DIR_GROUP
; 
4827  * Test the posix permissions in (vap) to determine whether (credential) 
4828  * may perform (action) 
4831 vnode_authorize_posix(vauth_ctx vcp
, int action
, int on_dir
) 
4833         struct vnode_attr 
*vap
; 
4834         int needed
, error
, owner_ok
, group_ok
, world_ok
, ismember
; 
4835 #ifdef KAUTH_DEBUG_ENABLE 
4836         const char *where 
= "uninitialized"; 
4837 # define _SETWHERE(c)   where = c; 
4839 # define _SETWHERE(c) 
4842         /* checking file or directory? */ 
4852          * We want to do as little work here as possible.  So first we check 
4853          * which sets of permissions grant us the access we need, and avoid checking 
4854          * whether specific permissions grant access when more generic ones would. 
4857         /* owner permissions */ 
4861         if (action 
& VWRITE
) 
4865         owner_ok 
= (needed 
& vap
->va_mode
) == needed
; 
4867         /* group permissions */ 
4871         if (action 
& VWRITE
) 
4875         group_ok 
= (needed 
& vap
->va_mode
) == needed
; 
4877         /* world permissions */ 
4881         if (action 
& VWRITE
) 
4885         world_ok 
= (needed 
& vap
->va_mode
) == needed
; 
4887         /* If granted/denied by all three, we're done */ 
4888         if (owner_ok 
&& group_ok 
&& world_ok
) { 
4892         if (!owner_ok 
&& !group_ok 
&& !world_ok
) { 
4898         /* Check ownership (relatively cheap) */ 
4899         if ((on_dir 
&& vauth_dir_owner(vcp
)) || 
4900             (!on_dir 
&& vauth_file_owner(vcp
))) { 
4907         /* Not owner; if group and world both grant it we're done */ 
4908         if (group_ok 
&& world_ok
) { 
4909                 _SETWHERE("group/world"); 
4912         if (!group_ok 
&& !world_ok
) { 
4913                 _SETWHERE("group/world"); 
4918         /* Check group membership (most expensive) */ 
4921                 error 
= vauth_dir_ingroup(vcp
, &ismember
); 
4923                 error 
= vauth_file_ingroup(vcp
, &ismember
); 
4934         /* Not owner, not in group, use world result */ 
4942         KAUTH_DEBUG("%p    %s - posix %s permissions : need %s%s%s %x have %s%s%s%s%s%s%s%s%s UID = %d file = %d,%d", 
4943             vcp
->vp
, (error 
== 0) ? "ALLOWED" : "DENIED", where
, 
4944             (action 
& VREAD
)  ? "r" : "-", 
4945             (action 
& VWRITE
) ? "w" : "-", 
4946             (action 
& VEXEC
)  ? "x" : "-", 
4948             (vap
->va_mode 
& S_IRUSR
) ? "r" : "-", 
4949             (vap
->va_mode 
& S_IWUSR
) ? "w" : "-", 
4950             (vap
->va_mode 
& S_IXUSR
) ? "x" : "-", 
4951             (vap
->va_mode 
& S_IRGRP
) ? "r" : "-", 
4952             (vap
->va_mode 
& S_IWGRP
) ? "w" : "-", 
4953             (vap
->va_mode 
& S_IXGRP
) ? "x" : "-", 
4954             (vap
->va_mode 
& S_IROTH
) ? "r" : "-", 
4955             (vap
->va_mode 
& S_IWOTH
) ? "w" : "-", 
4956             (vap
->va_mode 
& S_IXOTH
) ? "x" : "-", 
4957             kauth_cred_getuid(vcp
->ctx
->vc_ucred
), 
4958             on_dir 
? vcp
->dvap
->va_uid 
: vcp
->vap
->va_uid
, 
4959             on_dir 
? vcp
->dvap
->va_gid 
: vcp
->vap
->va_gid
); 
4964  * Authorize the deletion of the node vp from the directory dvp. 
4967  * - Neither the node nor the directory are immutable. 
4968  * - The user is not the superuser. 
4970  * Deletion is not permitted if the directory is sticky and the caller is not owner of the 
4971  * node or directory. 
4973  * If either the node grants DELETE, or the directory grants DELETE_CHILD, the node may be 
4974  * deleted.  If neither denies the permission, and the caller has Posix write access to the 
4975  * directory, then the node may be deleted. 
4978 vnode_authorize_delete(vauth_ctx vcp
) 
4980         struct vnode_attr       
*vap 
= vcp
->vap
; 
4981         struct vnode_attr       
*dvap 
= vcp
->dvap
; 
4982         kauth_cred_t            cred 
= vcp
->ctx
->vc_ucred
; 
4983         struct kauth_acl_eval   eval
; 
4984         int                     error
, delete_denied
, delete_child_denied
, ismember
; 
4986         /* check the ACL on the directory */ 
4987         delete_child_denied 
= 0; 
4988         if (VATTR_IS_NOT(dvap
, va_acl
, NULL
)) { 
4989                 eval
.ae_requested 
= KAUTH_VNODE_DELETE_CHILD
; 
4990                 eval
.ae_acl 
= &dvap
->va_acl
->acl_ace
[0]; 
4991                 eval
.ae_count 
= dvap
->va_acl
->acl_entrycount
; 
4992                 eval
.ae_options 
= 0; 
4993                 if (vauth_dir_owner(vcp
)) 
4994                         eval
.ae_options 
|= KAUTH_AEVAL_IS_OWNER
; 
4995                 if ((error 
= vauth_dir_ingroup(vcp
, &ismember
)) != 0) 
4998                         eval
.ae_options 
|= KAUTH_AEVAL_IN_GROUP
; 
4999                 eval
.ae_exp_gall 
= KAUTH_VNODE_GENERIC_ALL_BITS
; 
5000                 eval
.ae_exp_gread 
= KAUTH_VNODE_GENERIC_READ_BITS
; 
5001                 eval
.ae_exp_gwrite 
= KAUTH_VNODE_GENERIC_WRITE_BITS
; 
5002                 eval
.ae_exp_gexec 
= KAUTH_VNODE_GENERIC_EXECUTE_BITS
; 
5004                 error 
= kauth_acl_evaluate(cred
, &eval
); 
5007                         KAUTH_DEBUG("%p    ERROR during ACL processing - %d", vcp
->vp
, error
); 
5010                 if (eval
.ae_result 
== KAUTH_RESULT_DENY
) 
5011                         delete_child_denied 
= 1; 
5012                 if (eval
.ae_result 
== KAUTH_RESULT_ALLOW
) { 
5013                         KAUTH_DEBUG("%p    ALLOWED - granted by directory ACL", vcp
->vp
); 
5018         /* check the ACL on the node */ 
5020         if (VATTR_IS_NOT(vap
, va_acl
, NULL
)) { 
5021                 eval
.ae_requested 
= KAUTH_VNODE_DELETE
; 
5022                 eval
.ae_acl 
= &vap
->va_acl
->acl_ace
[0]; 
5023                 eval
.ae_count 
= vap
->va_acl
->acl_entrycount
; 
5024                 eval
.ae_options 
= 0; 
5025                 if (vauth_file_owner(vcp
)) 
5026                         eval
.ae_options 
|= KAUTH_AEVAL_IS_OWNER
; 
5027                 if ((error 
= vauth_file_ingroup(vcp
, &ismember
)) != 0) 
5030                         eval
.ae_options 
|= KAUTH_AEVAL_IN_GROUP
; 
5031                 eval
.ae_exp_gall 
= KAUTH_VNODE_GENERIC_ALL_BITS
; 
5032                 eval
.ae_exp_gread 
= KAUTH_VNODE_GENERIC_READ_BITS
; 
5033                 eval
.ae_exp_gwrite 
= KAUTH_VNODE_GENERIC_WRITE_BITS
; 
5034                 eval
.ae_exp_gexec 
= KAUTH_VNODE_GENERIC_EXECUTE_BITS
; 
5036                 if ((error 
= kauth_acl_evaluate(cred
, &eval
)) != 0) { 
5037                         KAUTH_DEBUG("%p    ERROR during ACL processing - %d", vcp
->vp
, error
); 
5040                 if (eval
.ae_result 
== KAUTH_RESULT_DENY
) 
5042                 if (eval
.ae_result 
== KAUTH_RESULT_ALLOW
) { 
5043                         KAUTH_DEBUG("%p    ALLOWED - granted by file ACL", vcp
->vp
); 
5048         /* if denied by ACL on directory or node, return denial */ 
5049         if (delete_denied 
|| delete_child_denied
) { 
5050                 KAUTH_DEBUG("%p    ALLOWED - denied by ACL", vcp
->vp
); 
5054         /* enforce sticky bit behaviour */ 
5055         if ((dvap
->va_mode 
& S_ISTXT
) && !vauth_file_owner(vcp
) && !vauth_dir_owner(vcp
)) { 
5056                 KAUTH_DEBUG("%p    DENIED - sticky bit rules (user %d  file %d  dir %d)", 
5057                     vcp
->vp
, cred
->cr_uid
, vap
->va_uid
, dvap
->va_uid
); 
5061         /* check the directory */ 
5062         if ((error 
= vnode_authorize_posix(vcp
, VWRITE
, 1 /* on_dir */)) != 0) { 
5063                 KAUTH_DEBUG("%p    ALLOWED - granted by posix permisssions", vcp
->vp
); 
5067         /* not denied, must be OK */ 
5073  * Authorize an operation based on the node's attributes. 
5076 vnode_authorize_simple(vauth_ctx vcp
, kauth_ace_rights_t acl_rights
, kauth_ace_rights_t preauth_rights
, boolean_t 
*found_deny
) 
5078         struct vnode_attr       
*vap 
= vcp
->vap
; 
5079         kauth_cred_t            cred 
= vcp
->ctx
->vc_ucred
; 
5080         struct kauth_acl_eval   eval
; 
5081         int                     error
, ismember
; 
5082         mode_t                  posix_action
; 
5085          * If we are the file owner, we automatically have some rights. 
5087          * Do we need to expand this to support group ownership? 
5089         if (vauth_file_owner(vcp
)) 
5090                 acl_rights 
&= ~(KAUTH_VNODE_WRITE_SECURITY
); 
5093          * If we are checking both TAKE_OWNERSHIP and WRITE_SECURITY, we can 
5094          * mask the latter.  If TAKE_OWNERSHIP is requested the caller is about to 
5095          * change ownership to themselves, and WRITE_SECURITY is implicitly 
5096          * granted to the owner.  We need to do this because at this point 
5097          * WRITE_SECURITY may not be granted as the caller is not currently 
5100         if ((acl_rights 
& KAUTH_VNODE_TAKE_OWNERSHIP
) && 
5101             (acl_rights 
& KAUTH_VNODE_WRITE_SECURITY
)) 
5102                 acl_rights 
&= ~KAUTH_VNODE_WRITE_SECURITY
; 
5104         if (acl_rights 
== 0) { 
5105                 KAUTH_DEBUG("%p    ALLOWED - implicit or no rights required", vcp
->vp
); 
5109         /* if we have an ACL, evaluate it */ 
5110         if (VATTR_IS_NOT(vap
, va_acl
, NULL
)) { 
5111                 eval
.ae_requested 
= acl_rights
; 
5112                 eval
.ae_acl 
= &vap
->va_acl
->acl_ace
[0]; 
5113                 eval
.ae_count 
= vap
->va_acl
->acl_entrycount
; 
5114                 eval
.ae_options 
= 0; 
5115                 if (vauth_file_owner(vcp
)) 
5116                         eval
.ae_options 
|= KAUTH_AEVAL_IS_OWNER
; 
5117                 if ((error 
= vauth_file_ingroup(vcp
, &ismember
)) != 0) 
5120                         eval
.ae_options 
|= KAUTH_AEVAL_IN_GROUP
; 
5121                 eval
.ae_exp_gall 
= KAUTH_VNODE_GENERIC_ALL_BITS
; 
5122                 eval
.ae_exp_gread 
= KAUTH_VNODE_GENERIC_READ_BITS
; 
5123                 eval
.ae_exp_gwrite 
= KAUTH_VNODE_GENERIC_WRITE_BITS
; 
5124                 eval
.ae_exp_gexec 
= KAUTH_VNODE_GENERIC_EXECUTE_BITS
; 
5126                 if ((error 
= kauth_acl_evaluate(cred
, &eval
)) != 0) { 
5127                         KAUTH_DEBUG("%p    ERROR during ACL processing - %d", vcp
->vp
, error
); 
5131                 if (eval
.ae_result 
== KAUTH_RESULT_DENY
) { 
5132                         KAUTH_DEBUG("%p    DENIED - by ACL", vcp
->vp
); 
5133                         return(EACCES
);                 /* deny, deny, counter-allege */ 
5135                 if (eval
.ae_result 
== KAUTH_RESULT_ALLOW
) { 
5136                         KAUTH_DEBUG("%p    ALLOWED - all rights granted by ACL", vcp
->vp
); 
5139                 *found_deny 
= eval
.ae_found_deny
; 
5141                 /* fall through and evaluate residual rights */ 
5143                 /* no ACL, everything is residual */ 
5144                 eval
.ae_residual 
= acl_rights
; 
5148          * Grant residual rights that have been pre-authorized. 
5150         eval
.ae_residual 
&= ~preauth_rights
; 
5153          * We grant WRITE_ATTRIBUTES to the owner if it hasn't been denied. 
5155         if (vauth_file_owner(vcp
)) 
5156                 eval
.ae_residual 
&= ~KAUTH_VNODE_WRITE_ATTRIBUTES
; 
5158         if (eval
.ae_residual 
== 0) { 
5159                 KAUTH_DEBUG("%p    ALLOWED - rights already authorized", vcp
->vp
); 
5164          * Bail if we have residual rights that can't be granted by posix permissions, 
5165          * or aren't presumed granted at this point. 
5167          * XXX these can be collapsed for performance 
5169         if (eval
.ae_residual 
& KAUTH_VNODE_CHANGE_OWNER
) { 
5170                 KAUTH_DEBUG("%p    DENIED - CHANGE_OWNER not permitted", vcp
->vp
); 
5173         if (eval
.ae_residual 
& KAUTH_VNODE_WRITE_SECURITY
) { 
5174                 KAUTH_DEBUG("%p    DENIED - WRITE_SECURITY not permitted", vcp
->vp
); 
5179         if (eval
.ae_residual 
& KAUTH_VNODE_DELETE
) 
5180                 panic("vnode_authorize: can't be checking delete permission here"); 
5184          * Compute the fallback posix permissions that will satisfy the remaining 
5188         if (eval
.ae_residual 
& (KAUTH_VNODE_READ_DATA 
| 
5189                 KAUTH_VNODE_LIST_DIRECTORY 
| 
5190                 KAUTH_VNODE_READ_EXTATTRIBUTES
)) 
5191                 posix_action 
|= VREAD
; 
5192         if (eval
.ae_residual 
& (KAUTH_VNODE_WRITE_DATA 
| 
5193                 KAUTH_VNODE_ADD_FILE 
| 
5194                 KAUTH_VNODE_ADD_SUBDIRECTORY 
| 
5195                 KAUTH_VNODE_DELETE_CHILD 
| 
5196                 KAUTH_VNODE_WRITE_ATTRIBUTES 
| 
5197                 KAUTH_VNODE_WRITE_EXTATTRIBUTES
)) 
5198                 posix_action 
|= VWRITE
; 
5199         if (eval
.ae_residual 
& (KAUTH_VNODE_EXECUTE 
| 
5200                 KAUTH_VNODE_SEARCH
)) 
5201                 posix_action 
|= VEXEC
; 
5203         if (posix_action 
!= 0) { 
5204                 return(vnode_authorize_posix(vcp
, posix_action
, 0 /* !on_dir */)); 
5206                 KAUTH_DEBUG("%p    ALLOWED - residual rights %s%s%s%s%s%s%s%s%s%s%s%s%s%s granted due to no posix mapping", 
5208                     (eval
.ae_residual 
& KAUTH_VNODE_READ_DATA
) 
5209                     ? vnode_isdir(vcp
->vp
) ? " LIST_DIRECTORY" : " READ_DATA" : "", 
5210                     (eval
.ae_residual 
& KAUTH_VNODE_WRITE_DATA
) 
5211                     ? vnode_isdir(vcp
->vp
) ? " ADD_FILE" : " WRITE_DATA" : "", 
5212                     (eval
.ae_residual 
& KAUTH_VNODE_EXECUTE
) 
5213                     ? vnode_isdir(vcp
->vp
) ? " SEARCH" : " EXECUTE" : "", 
5214                     (eval
.ae_residual 
& KAUTH_VNODE_DELETE
) 
5216                     (eval
.ae_residual 
& KAUTH_VNODE_APPEND_DATA
) 
5217                     ? vnode_isdir(vcp
->vp
) ? " ADD_SUBDIRECTORY" : " APPEND_DATA" : "", 
5218                     (eval
.ae_residual 
& KAUTH_VNODE_DELETE_CHILD
) 
5219                     ? " DELETE_CHILD" : "", 
5220                     (eval
.ae_residual 
& KAUTH_VNODE_READ_ATTRIBUTES
) 
5221                     ? " READ_ATTRIBUTES" : "", 
5222                     (eval
.ae_residual 
& KAUTH_VNODE_WRITE_ATTRIBUTES
) 
5223                     ? " WRITE_ATTRIBUTES" : "", 
5224                     (eval
.ae_residual 
& KAUTH_VNODE_READ_EXTATTRIBUTES
) 
5225                     ? " READ_EXTATTRIBUTES" : "", 
5226                     (eval
.ae_residual 
& KAUTH_VNODE_WRITE_EXTATTRIBUTES
) 
5227                     ? " WRITE_EXTATTRIBUTES" : "", 
5228                     (eval
.ae_residual 
& KAUTH_VNODE_READ_SECURITY
) 
5229                     ? " READ_SECURITY" : "", 
5230                     (eval
.ae_residual 
& KAUTH_VNODE_WRITE_SECURITY
) 
5231                     ? " WRITE_SECURITY" : "", 
5232                     (eval
.ae_residual 
& KAUTH_VNODE_CHECKIMMUTABLE
) 
5233                     ? " CHECKIMMUTABLE" : "", 
5234                     (eval
.ae_residual 
& KAUTH_VNODE_CHANGE_OWNER
) 
5235                     ? " CHANGE_OWNER" : ""); 
5239          * Lack of required Posix permissions implies no reason to deny access. 
5245  * Check for file immutability. 
5248 vnode_authorize_checkimmutable(vnode_t vp
, struct vnode_attr 
*vap
, int rights
, int ignore
) 
5255          * Perform immutability checks for operations that change data. 
5257          * Sockets, fifos and devices require special handling. 
5259         switch(vp
->v_type
) { 
5265                  * Writing to these nodes does not change the filesystem data, 
5266                  * so forget that it's being tried. 
5268                 rights 
&= ~KAUTH_VNODE_WRITE_DATA
; 
5275         if (rights 
& KAUTH_VNODE_WRITE_RIGHTS
) { 
5277                 /* check per-filesystem options if possible */ 
5281                         /* check for no-EA filesystems */ 
5282                         if ((rights 
& KAUTH_VNODE_WRITE_EXTATTRIBUTES
) && 
5283                             (vfs_flags(mp
) & MNT_NOUSERXATTR
)) { 
5284                                 KAUTH_DEBUG("%p    DENIED - filesystem disallowed extended attributes", vp
); 
5285                                 error 
= EACCES
;  /* User attributes disabled */ 
5290                 /* check for file immutability */ 
5292                 if (vp
->v_type 
== VDIR
) { 
5293                         if ((rights 
& (KAUTH_VNODE_ADD_FILE 
| KAUTH_VNODE_ADD_SUBDIRECTORY
)) == rights
) 
5296                         if ((rights 
& KAUTH_VNODE_APPEND_DATA
) == rights
) 
5299                 if ((error 
= vnode_immutable(vap
, append
, ignore
)) != 0) { 
5300                         KAUTH_DEBUG("%p    DENIED - file is immutable", vp
); 
5309  * Handle authorization actions for filesystems that advertise that the 
5310  * server will be enforcing. 
5312  * Returns:     0                       Authorization should be handled locally 
5313  *              1                       Authorization was handled by the FS 
5315  * Note:        Imputed returns will only occur if the authorization request 
5316  *              was handled by the FS. 
5318  * Imputed:     *resultp, modified      Return code from FS when the request is 
5319  *                                      handled by the FS. 
5324 vnode_authorize_opaque(vnode_t vp
, int *resultp
, kauth_action_t action
, vfs_context_t ctx
) 
5329          * If the vp is a device node, socket or FIFO it actually represents a local 
5330          * endpoint, so we need to handle it locally. 
5332         switch(vp
->v_type
) { 
5343          * In the advisory request case, if the filesystem doesn't think it's reliable 
5344          * we will attempt to formulate a result ourselves based on VNOP_GETATTR data. 
5346         if ((action 
& KAUTH_VNODE_ACCESS
) && !vfs_authopaqueaccess(vp
->v_mount
)) 
5350          * Let the filesystem have a say in the matter.  It's OK for it to not implemnent 
5351          * VNOP_ACCESS, as most will authorise inline with the actual request. 
5353         if ((error 
= VNOP_ACCESS(vp
, action
, ctx
)) != ENOTSUP
) { 
5355                 KAUTH_DEBUG("%p    DENIED - opaque filesystem VNOP_ACCESS denied access", vp
); 
5360          * Typically opaque filesystems do authorisation in-line, but exec is a special case.  In 
5361          * order to be reasonably sure that exec will be permitted, we try a bit harder here. 
5363         if ((action 
& KAUTH_VNODE_EXECUTE
) && (vp
->v_type 
== VREG
)) { 
5364                 /* try a VNOP_OPEN for readonly access */ 
5365                 if ((error 
= VNOP_OPEN(vp
, FREAD
, ctx
)) != 0) { 
5367                         KAUTH_DEBUG("%p    DENIED - EXECUTE denied because file could not be opened readonly", vp
); 
5370                 VNOP_CLOSE(vp
, FREAD
, ctx
); 
5374          * We don't have any reason to believe that the request has to be denied at this point, 
5375          * so go ahead and allow it. 
5378         KAUTH_DEBUG("%p    ALLOWED - bypassing access check for non-local filesystem", vp
); 
5386  * Returns:     KAUTH_RESULT_ALLOW 
5389  * Imputed:     *arg3, modified         Error code in the deny case 
5390  *              EROFS                   Read-only file system 
5391  *              EACCES                  Permission denied 
5392  *              EPERM                   Operation not permitted [no execute] 
5393  *      vnode_getattr:ENOMEM            Not enough space [only if has filesec] 
5395  *      vnode_authorize_opaque:*arg2    ??? 
5396  *      vnode_authorize_checkimmutable:??? 
5397  *      vnode_authorize_delete:??? 
5398  *      vnode_authorize_simple:??? 
5403 vnode_authorize_callback(kauth_cred_t cred
, void *idata
, kauth_action_t action
, 
5404                          uintptr_t arg0
, uintptr_t arg1
, uintptr_t arg2
, uintptr_t arg3
) 
5407         vnode_t         cvp 
= NULLVP
; 
5411         ctx 
= (vfs_context_t
)arg0
; 
5413         dvp 
= (vnode_t
)arg2
; 
5416          * if there are 2 vnodes passed in, we don't know at 
5417          * this point which rights to look at based on the  
5418          * combined action being passed in... defer until later... 
5419          * otherwise check the kauth 'rights' cache hung 
5420          * off of the vnode we're interested in... if we've already 
5421          * been granted the right we're currently interested in, 
5422          * we can just return success... otherwise we'll go through 
5423          * the process of authorizing the requested right(s)... if that 
5424          * succeeds, we'll add the right(s) to the cache. 
5425          * VNOP_SETATTR and VNOP_SETXATTR will invalidate this cache 
5434         if (vnode_cache_is_authorized(cvp
, ctx
, action
) == TRUE
) 
5435                 return KAUTH_RESULT_ALLOW
; 
5437         result 
= vnode_authorize_callback_int(cred
, idata
, action
, arg0
, arg1
, arg2
, arg3
); 
5439         if (result 
== KAUTH_RESULT_ALLOW 
&& cvp 
!= NULLVP
) 
5440                 vnode_cache_authorized_action(cvp
, ctx
, action
); 
5447 vnode_authorize_callback_int(__unused kauth_cred_t unused_cred
, __unused 
void *idata
, kauth_action_t action
, 
5448     uintptr_t arg0
, uintptr_t arg1
, uintptr_t arg2
, uintptr_t arg3
) 
5450         struct _vnode_authorize_context auth_context
; 
5455         kauth_ace_rights_t      rights
; 
5456         struct vnode_attr       va
, dva
; 
5460         boolean_t               parent_authorized_for_delete 
= FALSE
; 
5461         boolean_t               found_deny 
= FALSE
; 
5462         boolean_t               parent_ref
= FALSE
; 
5464         vcp 
= &auth_context
; 
5465         ctx 
= vcp
->ctx 
= (vfs_context_t
)arg0
; 
5466         vp 
= vcp
->vp 
= (vnode_t
)arg1
; 
5467         dvp 
= vcp
->dvp 
= (vnode_t
)arg2
; 
5468         errorp 
= (int *)arg3
; 
5470          * Note that we authorize against the context, not the passed cred 
5471          * (the same thing anyway) 
5473         cred 
= ctx
->vc_ucred
; 
5480         vcp
->flags 
= vcp
->flags_valid 
= 0; 
5483         if ((ctx 
== NULL
) || (vp 
== NULL
) || (cred 
== NULL
)) 
5484                 panic("vnode_authorize: bad arguments (context %p  vp %p  cred %p)", ctx
, vp
, cred
); 
5487         KAUTH_DEBUG("%p  AUTH - %s %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s on %s '%s' (0x%x:%p/%p)", 
5488             vp
, vfs_context_proc(ctx
)->p_comm
, 
5489             (action 
& KAUTH_VNODE_ACCESS
)               ? "access" : "auth", 
5490             (action 
& KAUTH_VNODE_READ_DATA
)            ? vnode_isdir(vp
) ? " LIST_DIRECTORY" : " READ_DATA" : "", 
5491             (action 
& KAUTH_VNODE_WRITE_DATA
)           ? vnode_isdir(vp
) ? " ADD_FILE" : " WRITE_DATA" : "", 
5492             (action 
& KAUTH_VNODE_EXECUTE
)              ? vnode_isdir(vp
) ? " SEARCH" : " EXECUTE" : "", 
5493             (action 
& KAUTH_VNODE_DELETE
)               ? " DELETE" : "", 
5494             (action 
& KAUTH_VNODE_APPEND_DATA
)          ? vnode_isdir(vp
) ? " ADD_SUBDIRECTORY" : " APPEND_DATA" : "", 
5495             (action 
& KAUTH_VNODE_DELETE_CHILD
)         ? " DELETE_CHILD" : "", 
5496             (action 
& KAUTH_VNODE_READ_ATTRIBUTES
)      ? " READ_ATTRIBUTES" : "", 
5497             (action 
& KAUTH_VNODE_WRITE_ATTRIBUTES
)     ? " WRITE_ATTRIBUTES" : "", 
5498             (action 
& KAUTH_VNODE_READ_EXTATTRIBUTES
)   ? " READ_EXTATTRIBUTES" : "", 
5499             (action 
& KAUTH_VNODE_WRITE_EXTATTRIBUTES
)  ? " WRITE_EXTATTRIBUTES" : "", 
5500             (action 
& KAUTH_VNODE_READ_SECURITY
)        ? " READ_SECURITY" : "", 
5501             (action 
& KAUTH_VNODE_WRITE_SECURITY
)       ? " WRITE_SECURITY" : "", 
5502             (action 
& KAUTH_VNODE_CHANGE_OWNER
)         ? " CHANGE_OWNER" : "", 
5503             (action 
& KAUTH_VNODE_NOIMMUTABLE
)          ? " (noimmutable)" : "", 
5504             vnode_isdir(vp
) ? "directory" : "file", 
5505             vp
->v_name 
? vp
->v_name 
: "<NULL>", action
, vp
, dvp
); 
5508          * Extract the control bits from the action, everything else is 
5511         noimmutable 
= (action 
& KAUTH_VNODE_NOIMMUTABLE
) ? 1 : 0; 
5512         rights 
= action 
& ~(KAUTH_VNODE_ACCESS 
| KAUTH_VNODE_NOIMMUTABLE
); 
5514         if (rights 
& KAUTH_VNODE_DELETE
) { 
5517                         panic("vnode_authorize: KAUTH_VNODE_DELETE test requires a directory"); 
5520                  * check to see if we've already authorized the parent 
5521                  * directory for deletion of its children... if so, we 
5522                  * can skip a whole bunch of work... we will still have to 
5523                  * authorize that this specific child can be removed 
5525                 if (vnode_cache_is_authorized(dvp
, ctx
, KAUTH_VNODE_DELETE
) == TRUE
) 
5526                         parent_authorized_for_delete 
= TRUE
; 
5532          * Check for read-only filesystems. 
5534         if ((rights 
& KAUTH_VNODE_WRITE_RIGHTS
) && 
5535             (vp
->v_mount
->mnt_flag 
& MNT_RDONLY
) && 
5536             ((vp
->v_type 
== VREG
) || (vp
->v_type 
== VDIR
) ||  
5537              (vp
->v_type 
== VLNK
) || (vp
->v_type 
== VCPLX
) ||  
5538              (rights 
& KAUTH_VNODE_DELETE
) || (rights 
& KAUTH_VNODE_DELETE_CHILD
))) { 
5544          * Check for noexec filesystems. 
5546         if ((rights 
& KAUTH_VNODE_EXECUTE
) && (vp
->v_type 
== VREG
) && (vp
->v_mount
->mnt_flag 
& MNT_NOEXEC
)) { 
5552          * Handle cases related to filesystems with non-local enforcement. 
5553          * This call can return 0, in which case we will fall through to perform a 
5554          * check based on VNOP_GETATTR data.  Otherwise it returns 1 and sets 
5555          * an appropriate result, at which point we can return immediately. 
5557         if ((vp
->v_mount
->mnt_kern_flag 
& MNTK_AUTH_OPAQUE
) && vnode_authorize_opaque(vp
, &result
, action
, ctx
)) 
5561          * Get vnode attributes and extended security information for the vnode 
5562          * and directory if required. 
5564         VATTR_WANTED(&va
, va_mode
); 
5565         VATTR_WANTED(&va
, va_uid
); 
5566         VATTR_WANTED(&va
, va_gid
); 
5567         VATTR_WANTED(&va
, va_flags
); 
5568         VATTR_WANTED(&va
, va_acl
); 
5569         if ((result 
= vnode_getattr(vp
, &va
, ctx
)) != 0) { 
5570                 KAUTH_DEBUG("%p    ERROR - failed to get vnode attributes - %d", vp
, result
); 
5573         if (dvp 
&& parent_authorized_for_delete 
== FALSE
) { 
5574                 VATTR_WANTED(&dva
, va_mode
); 
5575                 VATTR_WANTED(&dva
, va_uid
); 
5576                 VATTR_WANTED(&dva
, va_gid
); 
5577                 VATTR_WANTED(&dva
, va_flags
); 
5578                 VATTR_WANTED(&dva
, va_acl
); 
5579                 if ((result 
= vnode_getattr(dvp
, &dva
, ctx
)) != 0) { 
5580                         KAUTH_DEBUG("%p    ERROR - failed to get directory vnode attributes - %d", vp
, result
); 
5586          * If the vnode is an extended attribute data vnode (eg. a resource fork), *_DATA becomes 
5589         if (S_ISXATTR(va
.va_mode
) || vnode_isnamedstream(vp
)) { 
5590                 if (rights 
& KAUTH_VNODE_READ_DATA
) { 
5591                         rights 
&= ~KAUTH_VNODE_READ_DATA
; 
5592                         rights 
|= KAUTH_VNODE_READ_EXTATTRIBUTES
; 
5594                 if (rights 
& KAUTH_VNODE_WRITE_DATA
) { 
5595                         rights 
&= ~KAUTH_VNODE_WRITE_DATA
; 
5596                         rights 
|= KAUTH_VNODE_WRITE_EXTATTRIBUTES
; 
5601          * Point 'vp' to the resource fork's parent for ACL checking 
5603         if (vnode_isnamedstream(vp
) && 
5604             (vp
->v_parent 
!= NULL
) && 
5605             (vget_internal(vp
->v_parent
, 0, VNODE_NODEAD
) == 0)) { 
5607                 vcp
->vp 
= vp 
= vp
->v_parent
; 
5608                 if (VATTR_IS_SUPPORTED(&va
, va_acl
) && (va
.va_acl 
!= NULL
)) 
5609                         kauth_acl_free(va
.va_acl
); 
5611                 VATTR_WANTED(&va
, va_mode
); 
5612                 VATTR_WANTED(&va
, va_uid
); 
5613                 VATTR_WANTED(&va
, va_gid
); 
5614                 VATTR_WANTED(&va
, va_flags
); 
5615                 VATTR_WANTED(&va
, va_acl
); 
5616                 if ((result 
= vnode_getattr(vp
, &va
, ctx
)) != 0) 
5621          * Check for immutability. 
5623          * In the deletion case, parent directory immutability vetoes specific 
5626         if ((result 
= vnode_authorize_checkimmutable(vp
, &va
, rights
, noimmutable
)) != 0) 
5628         if ((rights 
& KAUTH_VNODE_DELETE
) && 
5629             parent_authorized_for_delete 
== FALSE 
&& 
5630             ((result 
= vnode_authorize_checkimmutable(dvp
, &dva
, KAUTH_VNODE_DELETE_CHILD
, 0)) != 0)) 
5634          * Clear rights that have been authorized by reaching this point, bail if nothing left to 
5637         rights 
&= ~(KAUTH_VNODE_LINKTARGET 
| KAUTH_VNODE_CHECKIMMUTABLE
); 
5642          * If we're not the superuser, authorize based on file properties. 
5644         if (!vfs_context_issuser(ctx
)) { 
5645                 /* process delete rights */ 
5646                 if ((rights 
& KAUTH_VNODE_DELETE
) && 
5647                     parent_authorized_for_delete 
== FALSE 
&& 
5648                     ((result 
= vnode_authorize_delete(vcp
)) != 0)) 
5651                 /* process remaining rights */ 
5652                 if ((rights 
& ~KAUTH_VNODE_DELETE
) && 
5653                     (result 
= vnode_authorize_simple(vcp
, rights
, rights 
& KAUTH_VNODE_DELETE
, &found_deny
)) != 0) 
5658                  * Execute is only granted to root if one of the x bits is set.  This check only 
5659                  * makes sense if the posix mode bits are actually supported. 
5661                 if ((rights 
& KAUTH_VNODE_EXECUTE
) && 
5662                     (vp
->v_type 
== VREG
) && 
5663                     VATTR_IS_SUPPORTED(&va
, va_mode
) && 
5664                     !(va
.va_mode 
& (S_IXUSR 
| S_IXGRP 
| S_IXOTH
))) { 
5666                         KAUTH_DEBUG("%p    DENIED - root execute requires at least one x bit in 0x%x", vp
, va
.va_mode
); 
5670                 KAUTH_DEBUG("%p    ALLOWED - caller is superuser", vp
); 
5673         if (VATTR_IS_SUPPORTED(&va
, va_acl
) && (va
.va_acl 
!= NULL
)) 
5674                 kauth_acl_free(va
.va_acl
); 
5675         if (VATTR_IS_SUPPORTED(&dva
, va_acl
) && (dva
.va_acl 
!= NULL
)) 
5676                 kauth_acl_free(dva
.va_acl
); 
5682                 KAUTH_DEBUG("%p    DENIED - auth denied", vp
); 
5683                 return(KAUTH_RESULT_DENY
); 
5685         if ((rights 
& KAUTH_VNODE_SEARCH
) && found_deny 
== FALSE 
&& vp
->v_type 
== VDIR
) { 
5687                  * if we were successfully granted the right to search this directory 
5688                  * and there were NO ACL DENYs for search and the posix permissions also don't 
5689                  * deny execute, we can synthesize a global right that allows anyone to  
5690                  * traverse this directory during a pathname lookup without having to 
5691                  * match the credential associated with this cache of rights. 
5693                 if (!VATTR_IS_SUPPORTED(&va
, va_mode
) || 
5694                     ((va
.va_mode 
& (S_IXUSR 
| S_IXGRP 
| S_IXOTH
)) == 
5695                      (S_IXUSR 
| S_IXGRP 
| S_IXOTH
))) { 
5696                         vnode_cache_authorized_action(vp
, ctx
, KAUTH_VNODE_SEARCHBYANYONE
); 
5699         if ((rights 
& KAUTH_VNODE_DELETE
) && parent_authorized_for_delete 
== FALSE
) { 
5701                  * parent was successfully and newly authorized for deletions 
5702                  * add it to the cache 
5704                 vnode_cache_authorized_action(dvp
, ctx
, KAUTH_VNODE_DELETE
); 
5709          * Note that this implies that we will allow requests for no rights, as well as 
5710          * for rights that we do not recognise.  There should be none of these. 
5712         KAUTH_DEBUG("%p    ALLOWED - auth granted", vp
); 
5713         return(KAUTH_RESULT_ALLOW
); 
5717  * Check that the attribute information in vattr can be legally applied to 
5718  * a new file by the context. 
5721 vnode_authattr_new(vnode_t dvp
, struct vnode_attr 
*vap
, int noauth
, vfs_context_t ctx
) 
5724         int             has_priv_suser
, ismember
, defaulted_owner
, defaulted_group
, defaulted_mode
; 
5730         defaulted_owner 
= defaulted_group 
= defaulted_mode 
= 0; 
5733          * Require that the filesystem support extended security to apply any. 
5735         if (!vfs_extendedsecurity(dvp
->v_mount
) && 
5736             (VATTR_IS_ACTIVE(vap
, va_acl
) || VATTR_IS_ACTIVE(vap
, va_uuuid
) || VATTR_IS_ACTIVE(vap
, va_guuid
))) { 
5742          * Default some fields. 
5747          * If the filesystem is mounted IGNORE_OWNERSHIP and an explicit owner is set, that 
5748          * owner takes ownership of all new files. 
5750         if ((dmp
->mnt_flag 
& MNT_IGNORE_OWNERSHIP
) && (dmp
->mnt_fsowner 
!= KAUTH_UID_NONE
)) { 
5751                 VATTR_SET(vap
, va_uid
, dmp
->mnt_fsowner
); 
5752                 defaulted_owner 
= 1; 
5754                 if (!VATTR_IS_ACTIVE(vap
, va_uid
)) { 
5755                         /* default owner is current user */ 
5756                         VATTR_SET(vap
, va_uid
, kauth_cred_getuid(vfs_context_ucred(ctx
))); 
5757                         defaulted_owner 
= 1; 
5762          * If the filesystem is mounted IGNORE_OWNERSHIP and an explicit grouo is set, that 
5763          * group takes ownership of all new files. 
5765         if ((dmp
->mnt_flag 
& MNT_IGNORE_OWNERSHIP
) && (dmp
->mnt_fsgroup 
!= KAUTH_GID_NONE
)) { 
5766                 VATTR_SET(vap
, va_gid
, dmp
->mnt_fsgroup
); 
5767                 defaulted_group 
= 1; 
5769                 if (!VATTR_IS_ACTIVE(vap
, va_gid
)) { 
5770                         /* default group comes from parent object, fallback to current user */ 
5771                         struct vnode_attr dva
; 
5773                         VATTR_WANTED(&dva
, va_gid
); 
5774                         if ((error 
= vnode_getattr(dvp
, &dva
, ctx
)) != 0) 
5776                         if (VATTR_IS_SUPPORTED(&dva
, va_gid
)) { 
5777                                 VATTR_SET(vap
, va_gid
, dva
.va_gid
); 
5779                                 VATTR_SET(vap
, va_gid
, kauth_cred_getgid(vfs_context_ucred(ctx
))); 
5781                         defaulted_group 
= 1; 
5785         if (!VATTR_IS_ACTIVE(vap
, va_flags
)) 
5786                 VATTR_SET(vap
, va_flags
, 0); 
5788         /* default mode is everything, masked with current umask */ 
5789         if (!VATTR_IS_ACTIVE(vap
, va_mode
)) { 
5790                 VATTR_SET(vap
, va_mode
, ACCESSPERMS 
& ~vfs_context_proc(ctx
)->p_fd
->fd_cmask
); 
5791                 KAUTH_DEBUG("ATTR - defaulting new file mode to %o from umask %o", vap
->va_mode
, vfs_context_proc(ctx
)->p_fd
->fd_cmask
); 
5794         /* set timestamps to now */ 
5795         if (!VATTR_IS_ACTIVE(vap
, va_create_time
)) { 
5796                 nanotime(&vap
->va_create_time
); 
5797                 VATTR_SET_ACTIVE(vap
, va_create_time
); 
5801          * Check for attempts to set nonsensical fields. 
5803         if (vap
->va_active 
& ~VNODE_ATTR_NEWOBJ
) { 
5805                 KAUTH_DEBUG("ATTR - ERROR - attempt to set unsupported new-file attributes %llx", 
5806                     vap
->va_active 
& ~VNODE_ATTR_NEWOBJ
); 
5811          * Quickly check for the applicability of any enforcement here. 
5812          * Tests below maintain the integrity of the local security model. 
5814         if (vfs_authopaque(dvp
->v_mount
)) 
5818          * We need to know if the caller is the superuser, or if the work is 
5819          * otherwise already authorised. 
5821         cred 
= vfs_context_ucred(ctx
); 
5823                 /* doing work for the kernel */ 
5826                 has_priv_suser 
= vfs_context_issuser(ctx
); 
5830         if (VATTR_IS_ACTIVE(vap
, va_flags
)) { 
5831                 if (has_priv_suser
) { 
5832                         if ((vap
->va_flags 
& (UF_SETTABLE 
| SF_SETTABLE
)) != vap
->va_flags
) { 
5834                                 KAUTH_DEBUG("  DENIED - superuser attempt to set illegal flag(s)"); 
5838                         if ((vap
->va_flags 
& UF_SETTABLE
) != vap
->va_flags
) { 
5840                                 KAUTH_DEBUG("  DENIED - user attempt to set illegal flag(s)"); 
5846         /* if not superuser, validate legality of new-item attributes */ 
5847         if (!has_priv_suser
) { 
5848                 if (!defaulted_mode 
&& VATTR_IS_ACTIVE(vap
, va_mode
)) { 
5850                         if (vap
->va_mode 
& S_ISGID
) { 
5851                                 if ((error 
= kauth_cred_ismember_gid(cred
, vap
->va_gid
, &ismember
)) != 0) { 
5852                                         KAUTH_DEBUG("ATTR - ERROR: got %d checking for membership in %d", error
, vap
->va_gid
); 
5856                                         KAUTH_DEBUG("  DENIED - can't set SGID bit, not a member of %d", vap
->va_gid
); 
5863                         if ((vap
->va_mode 
& S_ISUID
) && (vap
->va_uid 
!= kauth_cred_getuid(cred
))) { 
5864                                 KAUTH_DEBUG("ATTR - ERROR: illegal attempt to set the setuid bit"); 
5869                 if (!defaulted_owner 
&& (vap
->va_uid 
!= kauth_cred_getuid(cred
))) { 
5870                         KAUTH_DEBUG("  DENIED - cannot create new item owned by %d", vap
->va_uid
); 
5874                 if (!defaulted_group
) { 
5875                         if ((error 
= kauth_cred_ismember_gid(cred
, vap
->va_gid
, &ismember
)) != 0) { 
5876                                 KAUTH_DEBUG("  ERROR - got %d checking for membership in %d", error
, vap
->va_gid
); 
5880                                 KAUTH_DEBUG("  DENIED - cannot create new item with group %d - not a member", vap
->va_gid
); 
5886                 /* initialising owner/group UUID */ 
5887                 if (VATTR_IS_ACTIVE(vap
, va_uuuid
)) { 
5888                         if ((error 
= kauth_cred_getguid(cred
, &changer
)) != 0) { 
5889                                 KAUTH_DEBUG("  ERROR - got %d trying to get caller UUID", error
); 
5890                                 /* XXX ENOENT here - no GUID - should perhaps become EPERM */ 
5893                         if (!kauth_guid_equal(&vap
->va_uuuid
, &changer
)) { 
5894                                 KAUTH_DEBUG("  ERROR - cannot create item with supplied owner UUID - not us"); 
5899                 if (VATTR_IS_ACTIVE(vap
, va_guuid
)) { 
5900                         if ((error 
= kauth_cred_ismember_guid(cred
, &vap
->va_guuid
, &ismember
)) != 0) { 
5901                                 KAUTH_DEBUG("  ERROR - got %d trying to check group membership", error
); 
5905                                 KAUTH_DEBUG("  ERROR - cannot create item with supplied group UUID - not a member"); 
5916  * Check that the attribute information in vap can be legally written by the 
5919  * Call this when you're not sure about the vnode_attr; either its contents 
5920  * have come from an unknown source, or when they are variable. 
5922  * Returns errno, or zero and sets *actionp to the KAUTH_VNODE_* actions that 
5923  * must be authorized to be permitted to write the vattr. 
5926 vnode_authattr(vnode_t vp
, struct vnode_attr 
*vap
, kauth_action_t 
*actionp
, vfs_context_t ctx
) 
5928         struct vnode_attr ova
; 
5929         kauth_action_t  required_action
; 
5930         int             error
, has_priv_suser
, ismember
, chowner
, chgroup
, clear_suid
, clear_sgid
; 
5939         required_action 
= 0; 
5943          * Quickly check for enforcement applicability. 
5945         if (vfs_authopaque(vp
->v_mount
)) 
5949          * Check for attempts to set nonsensical fields. 
5951         if (vap
->va_active 
& VNODE_ATTR_RDONLY
) { 
5952                 KAUTH_DEBUG("ATTR - ERROR: attempt to set readonly attribute(s)"); 
5958          * We need to know if the caller is the superuser. 
5960         cred 
= vfs_context_ucred(ctx
); 
5961         has_priv_suser 
= kauth_cred_issuser(cred
); 
5964          * If any of the following are changing, we need information from the old file: 
5971         if (VATTR_IS_ACTIVE(vap
, va_uid
) || 
5972             VATTR_IS_ACTIVE(vap
, va_gid
) || 
5973             VATTR_IS_ACTIVE(vap
, va_mode
) || 
5974             VATTR_IS_ACTIVE(vap
, va_uuuid
) || 
5975             VATTR_IS_ACTIVE(vap
, va_guuid
)) { 
5976                 VATTR_WANTED(&ova
, va_mode
); 
5977                 VATTR_WANTED(&ova
, va_uid
); 
5978                 VATTR_WANTED(&ova
, va_gid
); 
5979                 VATTR_WANTED(&ova
, va_uuuid
); 
5980                 VATTR_WANTED(&ova
, va_guuid
); 
5981                 KAUTH_DEBUG("ATTR - security information changing, fetching existing attributes"); 
5985          * If timestamps are being changed, we need to know who the file is owned 
5988         if (VATTR_IS_ACTIVE(vap
, va_create_time
) || 
5989             VATTR_IS_ACTIVE(vap
, va_change_time
) || 
5990             VATTR_IS_ACTIVE(vap
, va_modify_time
) || 
5991             VATTR_IS_ACTIVE(vap
, va_access_time
) || 
5992             VATTR_IS_ACTIVE(vap
, va_backup_time
)) { 
5994                 VATTR_WANTED(&ova
, va_uid
); 
5995 #if 0   /* enable this when we support UUIDs as official owners */ 
5996                 VATTR_WANTED(&ova
, va_uuuid
); 
5998                 KAUTH_DEBUG("ATTR - timestamps changing, fetching uid and GUID"); 
6002          * If flags are being changed, we need the old flags. 
6004         if (VATTR_IS_ACTIVE(vap
, va_flags
)) { 
6005                 KAUTH_DEBUG("ATTR - flags changing, fetching old flags"); 
6006                 VATTR_WANTED(&ova
, va_flags
); 
6010          * If the size is being set, make sure it's not a directory. 
6012         if (VATTR_IS_ACTIVE(vap
, va_data_size
)) { 
6013                 /* size is meaningless on a directory, don't permit this */ 
6014                 if (vnode_isdir(vp
)) { 
6015                         KAUTH_DEBUG("ATTR - ERROR: size change requested on a directory"); 
6024         KAUTH_DEBUG("ATTR - fetching old attributes %016llx", ova
.va_active
); 
6025         if ((error 
= vnode_getattr(vp
, &ova
, ctx
)) != 0) { 
6026                 KAUTH_DEBUG("  ERROR - got %d trying to get attributes", error
); 
6031          * Size changes require write access to the file data. 
6033         if (VATTR_IS_ACTIVE(vap
, va_data_size
)) { 
6034                 /* if we can't get the size, or it's different, we need write access */ 
6035                         KAUTH_DEBUG("ATTR - size change, requiring WRITE_DATA"); 
6036                         required_action 
|= KAUTH_VNODE_WRITE_DATA
; 
6040          * Changing timestamps? 
6042          * Note that we are only called to authorize user-requested time changes; 
6043          * side-effect time changes are not authorized.  Authorisation is only 
6044          * required for existing files. 
6046          * Non-owners are not permitted to change the time on an existing 
6047          * file to anything other than the current time. 
6049         if (VATTR_IS_ACTIVE(vap
, va_create_time
) || 
6050             VATTR_IS_ACTIVE(vap
, va_change_time
) || 
6051             VATTR_IS_ACTIVE(vap
, va_modify_time
) || 
6052             VATTR_IS_ACTIVE(vap
, va_access_time
) || 
6053             VATTR_IS_ACTIVE(vap
, va_backup_time
)) { 
6055                  * The owner and root may set any timestamps they like, 
6056                  * provided that the file is not immutable.  The owner still needs 
6057                  * WRITE_ATTRIBUTES (implied by ownership but still deniable). 
6059                 if (has_priv_suser 
|| vauth_node_owner(&ova
, cred
)) { 
6060                         KAUTH_DEBUG("ATTR - root or owner changing timestamps"); 
6061                         required_action 
|= KAUTH_VNODE_CHECKIMMUTABLE 
| KAUTH_VNODE_WRITE_ATTRIBUTES
; 
6063                         /* just setting the current time? */ 
6064                         if (vap
->va_vaflags 
& VA_UTIMES_NULL
) { 
6065                                 KAUTH_DEBUG("ATTR - non-root/owner changing timestamps, requiring WRITE_ATTRIBUTES"); 
6066                                 required_action 
|= KAUTH_VNODE_WRITE_ATTRIBUTES
; 
6068                                 KAUTH_DEBUG("ATTR - ERROR: illegal timestamp modification attempted"); 
6076          * Changing file mode? 
6078         if (VATTR_IS_ACTIVE(vap
, va_mode
) && VATTR_IS_SUPPORTED(&ova
, va_mode
) && (ova
.va_mode 
!= vap
->va_mode
)) { 
6079                 KAUTH_DEBUG("ATTR - mode change from %06o to %06o", ova
.va_mode
, vap
->va_mode
); 
6082                  * Mode changes always have the same basic auth requirements. 
6084                 if (has_priv_suser
) { 
6085                         KAUTH_DEBUG("ATTR - superuser mode change, requiring immutability check"); 
6086                         required_action 
|= KAUTH_VNODE_CHECKIMMUTABLE
; 
6088                         /* need WRITE_SECURITY */ 
6089                         KAUTH_DEBUG("ATTR - non-superuser mode change, requiring WRITE_SECURITY"); 
6090                         required_action 
|= KAUTH_VNODE_WRITE_SECURITY
; 
6094                  * Can't set the setgid bit if you're not in the group and not root.  Have to have 
6095                  * existing group information in the case we're not setting it right now. 
6097                 if (vap
->va_mode 
& S_ISGID
) { 
6098                         required_action 
|= KAUTH_VNODE_CHECKIMMUTABLE
;  /* always required */ 
6099                         if (!has_priv_suser
) { 
6100                                 if (VATTR_IS_ACTIVE(vap
, va_gid
)) { 
6101                                         group 
= vap
->va_gid
; 
6102                                 } else if (VATTR_IS_SUPPORTED(&ova
, va_gid
)) { 
6105                                         KAUTH_DEBUG("ATTR - ERROR: setgid but no gid available"); 
6110                                  * This might be too restrictive; WRITE_SECURITY might be implied by 
6111                                  * membership in this case, rather than being an additional requirement. 
6113                                 if ((error 
= kauth_cred_ismember_gid(cred
, group
, &ismember
)) != 0) { 
6114                                         KAUTH_DEBUG("ATTR - ERROR: got %d checking for membership in %d", error
, vap
->va_gid
); 
6118                                         KAUTH_DEBUG("  DENIED - can't set SGID bit, not a member of %d", group
); 
6126                  * Can't set the setuid bit unless you're root or the file's owner. 
6128                 if (vap
->va_mode 
& S_ISUID
) { 
6129                         required_action 
|= KAUTH_VNODE_CHECKIMMUTABLE
;  /* always required */ 
6130                         if (!has_priv_suser
) { 
6131                                 if (VATTR_IS_ACTIVE(vap
, va_uid
)) { 
6132                                         owner 
= vap
->va_uid
; 
6133                                 } else if (VATTR_IS_SUPPORTED(&ova
, va_uid
)) { 
6136                                         KAUTH_DEBUG("ATTR - ERROR: setuid but no uid available"); 
6140                                 if (owner 
!= kauth_cred_getuid(cred
)) { 
6142                                          * We could allow this if WRITE_SECURITY is permitted, perhaps. 
6144                                         KAUTH_DEBUG("ATTR - ERROR: illegal attempt to set the setuid bit"); 
6153          * Validate/mask flags changes.  This checks that only the flags in 
6154          * the UF_SETTABLE mask are being set, and preserves the flags in 
6155          * the SF_SETTABLE case. 
6157          * Since flags changes may be made in conjunction with other changes, 
6158          * we will ask the auth code to ignore immutability in the case that 
6159          * the SF_* flags are not set and we are only manipulating the file flags. 
6162         if (VATTR_IS_ACTIVE(vap
, va_flags
)) { 
6163                 /* compute changing flags bits */ 
6164                 if (VATTR_IS_SUPPORTED(&ova
, va_flags
)) { 
6165                         fdelta 
= vap
->va_flags 
^ ova
.va_flags
; 
6167                         fdelta 
= vap
->va_flags
; 
6171                         KAUTH_DEBUG("ATTR - flags changing, requiring WRITE_SECURITY"); 
6172                         required_action 
|= KAUTH_VNODE_WRITE_SECURITY
; 
6174                         /* check that changing bits are legal */ 
6175                         if (has_priv_suser
) { 
6177                                  * The immutability check will prevent us from clearing the SF_* 
6178                                  * flags unless the system securelevel permits it, so just check 
6179                                  * for legal flags here. 
6181                                 if (fdelta 
& ~(UF_SETTABLE 
| SF_SETTABLE
)) { 
6183                                         KAUTH_DEBUG("  DENIED - superuser attempt to set illegal flag(s)"); 
6187                                 if (fdelta 
& ~UF_SETTABLE
) { 
6189                                         KAUTH_DEBUG("  DENIED - user attempt to set illegal flag(s)"); 
6194                          * If the caller has the ability to manipulate file flags, 
6195                          * security is not reduced by ignoring them for this operation. 
6197                          * A more complete test here would consider the 'after' states of the flags 
6198                          * to determine whether it would permit the operation, but this becomes 
6201                          * Ignoring immutability is conditional on securelevel; this does not bypass 
6202                          * the SF_* flags if securelevel > 0. 
6204                         required_action 
|= KAUTH_VNODE_NOIMMUTABLE
; 
6209          * Validate ownership information. 
6218          * Note that if the filesystem didn't give us a UID, we expect that it doesn't 
6219          * support them in general, and will ignore it if/when we try to set it. 
6220          * We might want to clear the uid out of vap completely here. 
6222         if (VATTR_IS_ACTIVE(vap
, va_uid
)) { 
6223                 if (VATTR_IS_SUPPORTED(&ova
, va_uid
) && (vap
->va_uid 
!= ova
.va_uid
)) { 
6224                 if (!has_priv_suser 
&& (kauth_cred_getuid(cred
) != vap
->va_uid
)) { 
6225                         KAUTH_DEBUG("  DENIED - non-superuser cannot change ownershipt to a third party"); 
6236          * Note that if the filesystem didn't give us a GID, we expect that it doesn't 
6237          * support them in general, and will ignore it if/when we try to set it. 
6238          * We might want to clear the gid out of vap completely here. 
6240         if (VATTR_IS_ACTIVE(vap
, va_gid
)) { 
6241                 if (VATTR_IS_SUPPORTED(&ova
, va_gid
) && (vap
->va_gid 
!= ova
.va_gid
)) { 
6242                 if (!has_priv_suser
) { 
6243                         if ((error 
= kauth_cred_ismember_gid(cred
, vap
->va_gid
, &ismember
)) != 0) { 
6244                                 KAUTH_DEBUG("  ERROR - got %d checking for membership in %d", error
, vap
->va_gid
); 
6248                                 KAUTH_DEBUG("  DENIED - group change from %d to %d but not a member of target group", 
6249                                     ova
.va_gid
, vap
->va_gid
); 
6260          * Owner UUID being set or changed. 
6262         if (VATTR_IS_ACTIVE(vap
, va_uuuid
)) { 
6263                 /* if the owner UUID is not actually changing ... */ 
6264                 if (VATTR_IS_SUPPORTED(&ova
, va_uuuid
) && kauth_guid_equal(&vap
->va_uuuid
, &ova
.va_uuuid
)) 
6265                         goto no_uuuid_change
; 
6268                  * The owner UUID cannot be set by a non-superuser to anything other than 
6271                 if (!has_priv_suser
) { 
6272                         if ((error 
= kauth_cred_getguid(cred
, &changer
)) != 0) { 
6273                                 KAUTH_DEBUG("  ERROR - got %d trying to get caller UUID", error
); 
6274                                 /* XXX ENOENT here - no UUID - should perhaps become EPERM */ 
6277                         if (!kauth_guid_equal(&vap
->va_uuuid
, &changer
)) { 
6278                                 KAUTH_DEBUG("  ERROR - cannot set supplied owner UUID - not us"); 
6288          * Group UUID being set or changed. 
6290         if (VATTR_IS_ACTIVE(vap
, va_guuid
)) { 
6291                 /* if the group UUID is not actually changing ... */ 
6292                 if (VATTR_IS_SUPPORTED(&ova
, va_guuid
) && kauth_guid_equal(&vap
->va_guuid
, &ova
.va_guuid
)) 
6293                         goto no_guuid_change
; 
6296                  * The group UUID cannot be set by a non-superuser to anything other than 
6297                  * one of which they are a member. 
6299                 if (!has_priv_suser
) { 
6300                         if ((error 
= kauth_cred_ismember_guid(cred
, &vap
->va_guuid
, &ismember
)) != 0) { 
6301                                 KAUTH_DEBUG("  ERROR - got %d trying to check group membership", error
); 
6305                                 KAUTH_DEBUG("  ERROR - cannot create item with supplied group UUID - not a member"); 
6315          * Compute authorisation for group/ownership changes. 
6317         if (chowner 
|| chgroup 
|| clear_suid 
|| clear_sgid
) { 
6318                 if (has_priv_suser
) { 
6319                         KAUTH_DEBUG("ATTR - superuser changing file owner/group, requiring immutability check"); 
6320                         required_action 
|= KAUTH_VNODE_CHECKIMMUTABLE
; 
6323                                 KAUTH_DEBUG("ATTR - ownership change, requiring TAKE_OWNERSHIP"); 
6324                                 required_action 
|= KAUTH_VNODE_TAKE_OWNERSHIP
; 
6326                         if (chgroup 
&& !chowner
) { 
6327                                 KAUTH_DEBUG("ATTR - group change, requiring WRITE_SECURITY"); 
6328                                 required_action 
|= KAUTH_VNODE_WRITE_SECURITY
; 
6331                         /* clear set-uid and set-gid bits as required by Posix */ 
6332                         if (VATTR_IS_ACTIVE(vap
, va_mode
)) { 
6333                                 newmode 
= vap
->va_mode
; 
6334                         } else if (VATTR_IS_SUPPORTED(&ova
, va_mode
)) { 
6335                                 newmode 
= ova
.va_mode
; 
6337                                 KAUTH_DEBUG("CHOWN - trying to change owner but cannot get mode from filesystem to mask setugid bits"); 
6340                         if (newmode 
& (S_ISUID 
| S_ISGID
)) { 
6341                                 VATTR_SET(vap
, va_mode
, newmode 
& ~(S_ISUID 
| S_ISGID
)); 
6342                                 KAUTH_DEBUG("CHOWN - masking setugid bits from mode %o to %o", newmode
, vap
->va_mode
); 
6348          * Authorise changes in the ACL. 
6350         if (VATTR_IS_ACTIVE(vap
, va_acl
)) { 
6352                 /* no existing ACL */ 
6353                 if (!VATTR_IS_ACTIVE(&ova
, va_acl
) || (ova
.va_acl 
== NULL
)) { 
6356                         if (vap
->va_acl 
!= NULL
) { 
6357                                 required_action 
|= KAUTH_VNODE_WRITE_SECURITY
; 
6358                                 KAUTH_DEBUG("CHMOD - adding ACL"); 
6361                         /* removing an existing ACL */ 
6362                 } else if (vap
->va_acl 
== NULL
) { 
6363                         required_action 
|= KAUTH_VNODE_WRITE_SECURITY
; 
6364                         KAUTH_DEBUG("CHMOD - removing ACL"); 
6366                         /* updating an existing ACL */ 
6368                         if (vap
->va_acl
->acl_entrycount 
!= ova
.va_acl
->acl_entrycount
) { 
6369                                 /* entry count changed, must be different */ 
6370                                 required_action 
|= KAUTH_VNODE_WRITE_SECURITY
; 
6371                                 KAUTH_DEBUG("CHMOD - adding/removing ACL entries"); 
6372                         } else if (vap
->va_acl
->acl_entrycount 
> 0) { 
6373                                 /* both ACLs have the same ACE count, said count is 1 or more, bitwise compare ACLs */ 
6374                                 if (!memcmp(&vap
->va_acl
->acl_ace
[0], &ova
.va_acl
->acl_ace
[0], 
6375                                         sizeof(struct kauth_ace
) * vap
->va_acl
->acl_entrycount
)) { 
6376                                         required_action 
|= KAUTH_VNODE_WRITE_SECURITY
; 
6377                                         KAUTH_DEBUG("CHMOD - changing ACL entries"); 
6384          * Other attributes that require authorisation. 
6386         if (VATTR_IS_ACTIVE(vap
, va_encoding
)) 
6387                 required_action 
|= KAUTH_VNODE_WRITE_ATTRIBUTES
; 
6390         if (VATTR_IS_SUPPORTED(&ova
, va_acl
) && (ova
.va_acl 
!= NULL
)) 
6391                 kauth_acl_free(ova
.va_acl
); 
6393                 *actionp 
= required_action
; 
6399 vfs_setlocklocal(mount_t mp
) 
6404         mp
->mnt_kern_flag 
|= MNTK_LOCK_LOCAL
; 
6407          * We do not expect anyone to be using any vnodes at the 
6408          * time this routine is called. So no need for vnode locking  
6410         TAILQ_FOREACH(vp
, &mp
->mnt_vnodelist
, v_mntvnodes
) { 
6411                         vp
->v_flag 
|= VLOCKLOCAL
; 
6413         TAILQ_FOREACH(vp
, &mp
->mnt_workerqueue
, v_mntvnodes
) { 
6414                         vp
->v_flag 
|= VLOCKLOCAL
; 
6416         TAILQ_FOREACH(vp
, &mp
->mnt_newvnodes
, v_mntvnodes
) { 
6417                         vp
->v_flag 
|= VLOCKLOCAL
; 
6423 vn_setunionwait(vnode_t vp
) 
6425         vnode_lock_spin(vp
); 
6426         vp
->v_flag 
|= VISUNION
; 
6432 vn_checkunionwait(vnode_t vp
) 
6435         while ((vp
->v_flag 
& VISUNION
) == VISUNION
) 
6436                 msleep((caddr_t
)&vp
->v_flag
, &vp
->v_lock
, 0, 0, 0); 
6441 vn_clearunionwait(vnode_t vp
, int locked
) 
6445         if((vp
->v_flag 
& VISUNION
) == VISUNION
) { 
6446                 vp
->v_flag 
&= ~VISUNION
; 
6447                 wakeup((caddr_t
)&vp
->v_flag
); 
6454  * XXX - get "don't trigger mounts" flag for thread; used by autofs. 
6456 extern int thread_notrigger(void); 
6459 thread_notrigger(void) 
6461         struct uthread 
*uth 
= (struct uthread 
*)get_bsdthread_info(current_thread()); 
6462         return (uth
->uu_notrigger
); 
6466  * Removes orphaned apple double files during a rmdir 
6468  * 1. vnode_suspend(). 
6469  * 2. Call VNOP_READDIR() till the end of directory is reached.   
6470  * 3. Check if the directory entries returned are regular files with name starting with "._".  If not, return ENOTEMPTY.   
6471  * 4. Continue (2) and (3) till end of directory is reached. 
6472  * 5. If all the entries in the directory were files with "._" name, delete all the files. 
6474  * 7. If deletion of all files succeeded, call VNOP_RMDIR() again. 
6477 errno_t 
rmdir_remove_orphaned_appleDouble(vnode_t vp 
, vfs_context_t ctx
, int * restart_flag
)  
6480 #define UIO_BUFF_SIZE 2048 
6482         int eofflag
, siz 
= UIO_BUFF_SIZE
, nentries 
= 0; 
6483         int open_flag 
= 0, full_erase_flag 
= 0; 
6484         char uio_buf
[ UIO_SIZEOF(1) ]; 
6485         char *rbuf 
= NULL
, *cpos
, *cend
; 
6486         struct nameidata nd_temp
; 
6490         error 
= vnode_suspend(vp
); 
6493          * restart_flag is set so that the calling rmdir sleeps and resets 
6503         MALLOC(rbuf
, caddr_t
, siz
, M_TEMP
, M_WAITOK
); 
6505                 auio 
= uio_createwithbuffer(1, 0, UIO_SYSSPACE
, UIO_READ
, 
6506                                 &uio_buf
[0], sizeof(uio_buf
)); 
6507         if (!rbuf 
|| !auio
) { 
6512         uio_setoffset(auio
,0); 
6516         if ((error 
= VNOP_OPEN(vp
, FREAD
, ctx
)))  
6522          * First pass checks if all files are appleDouble files. 
6526                 siz 
= UIO_BUFF_SIZE
; 
6527                 uio_reset(auio
, uio_offset(auio
), UIO_SYSSPACE
, UIO_READ
); 
6528                 uio_addiov(auio
, CAST_USER_ADDR_T(rbuf
), UIO_BUFF_SIZE
); 
6530                 if((error 
= VNOP_READDIR(vp
, auio
, 0, &eofflag
, &nentries
, ctx
))) 
6533                 if (uio_resid(auio
) != 0)  
6534                         siz 
-= uio_resid(auio
); 
6537                  * Iterate through directory 
6541                 dp 
= (struct dirent
*) cpos
; 
6546                 while ((cpos 
< cend
)) { 
6548                          * Check for . and .. as well as directories 
6550                         if (dp
->d_ino 
!= 0 &&  
6551                                         !((dp
->d_namlen 
== 1 && dp
->d_name
[0] == '.') || 
6552                                             (dp
->d_namlen 
== 2 && dp
->d_name
[0] == '.' && dp
->d_name
[1] == '.'))) { 
6554                                  * Check for irregular files and ._ files 
6555                                  * If there is a ._._ file abort the op 
6557                                 if ( dp
->d_namlen 
< 2 || 
6558                                                 strncmp(dp
->d_name
,"._",2) || 
6559                                                 (dp
->d_namlen 
>= 4 && !strncmp(&(dp
->d_name
[2]), "._",2))) { 
6564                         cpos 
+= dp
->d_reclen
; 
6565                         dp 
= (struct dirent
*)cpos
; 
6569                  * workaround for HFS/NFS setting eofflag before end of file  
6571                 if (vp
->v_tag 
== VT_HFS 
&& nentries 
> 2) 
6574                 if (vp
->v_tag 
== VT_NFS
) { 
6575                         if (eofflag 
&& !full_erase_flag
) { 
6576                                 full_erase_flag 
= 1; 
6578                                 uio_reset(auio
, 0, UIO_SYSSPACE
, UIO_READ
); 
6580                         else if (!eofflag 
&& full_erase_flag
) 
6581                                 full_erase_flag 
= 0; 
6586          * If we've made it here all the files in the dir are AppleDouble  
6587          * We can delete the files even though the node is suspended 
6588          * because we are the owner of the file. 
6591         uio_reset(auio
, 0, UIO_SYSSPACE
, UIO_READ
); 
6593         full_erase_flag 
= 0; 
6596                 siz 
= UIO_BUFF_SIZE
; 
6597                 uio_reset(auio
, uio_offset(auio
), UIO_SYSSPACE
, UIO_READ
); 
6598                 uio_addiov(auio
, CAST_USER_ADDR_T(rbuf
), UIO_BUFF_SIZE
); 
6600                 error 
= VNOP_READDIR(vp
, auio
, 0, &eofflag
, &nentries
, ctx
); 
6605                 if (uio_resid(auio
) != 0)  
6606                         siz 
-= uio_resid(auio
); 
6609                  * Iterate through directory 
6613                 dp 
= (struct dirent
*) cpos
; 
6618                 while ((cpos 
< cend
)) { 
6620                          * Check for . and .. as well as directories 
6622                         if (dp
->d_ino 
!= 0 &&  
6623                                         !((dp
->d_namlen 
== 1 && dp
->d_name
[0] == '.') || 
6624                                             (dp
->d_namlen 
== 2 && dp
->d_name
[0] == '.' && dp
->d_name
[1] == '.')) 
6626                                 NDINIT(&nd_temp
, DELETE
, USEDVP
, UIO_SYSSPACE
, CAST_USER_ADDR_T(dp
->d_name
), ctx
); 
6627                                 nd_temp
.ni_dvp 
= vp
; 
6628                                 error 
= unlink1(ctx
, &nd_temp
, 0); 
6629                                 if(error 
&& error 
!= ENOENT
) 
6632                         cpos 
+= dp
->d_reclen
; 
6633                         dp 
= (struct dirent
*)cpos
; 
6637                  * workaround for HFS/NFS setting eofflag before end of file  
6639                 if (vp
->v_tag 
== VT_HFS 
&& nentries 
> 2) 
6642                 if (vp
->v_tag 
== VT_NFS
) { 
6643                         if (eofflag 
&& !full_erase_flag
) { 
6644                                 full_erase_flag 
= 1; 
6646                                 uio_reset(auio
, 0, UIO_SYSSPACE
, UIO_READ
); 
6648                         else if (!eofflag 
&& full_erase_flag
) 
6649                                 full_erase_flag 
= 0; 
6659                 VNOP_CLOSE(vp
, FREAD
, ctx
); 
6674 record_vp(vnode_t vp
, int count
) { 
6678         if ((vp
->v_flag 
& VSYSTEM
)) 
6681         ut 
= get_bsdthread_info(current_thread()); 
6682         ut
->uu_iocount 
+= count
; 
6684         if (ut
->uu_vpindex 
< 32) { 
6685                 for (i 
= 0; i 
< ut
->uu_vpindex
; i
++) { 
6686                         if (ut
->uu_vps
[i
] == vp
) 
6689                 ut
->uu_vps
[ut
->uu_vpindex
] = vp
;