2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
25 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
27 * Copyright (c) 1989, 1993
28 * The Regents of the University of California. All rights reserved.
29 * (c) UNIX System Laboratories, Inc.
30 * All or some portions of this file are derived from material licensed
31 * to the University of California by American Telephone and Telegraph
32 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
33 * the permission of UNIX System Laboratories, Inc.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
67 * External virtual filesystem routines
73 #include <sys/param.h>
74 #include <sys/systm.h>
76 #include <sys/mount.h>
78 #include <sys/vnode.h>
80 #include <sys/namei.h>
81 #include <sys/ucred.h>
83 #include <sys/errno.h>
84 #include <sys/malloc.h>
85 #include <sys/domain.h>
87 #include <sys/syslog.h>
90 #include <sys/sysctl.h>
91 #include <sys/filedesc.h>
92 #include <sys/event.h>
95 #include <machine/spl.h>
98 #include <kern/assert.h>
100 #include <miscfs/specfs/specdev.h>
102 #include <mach/mach_types.h>
103 #include <mach/memory_object_types.h>
106 enum vtype iftovt_tab
[16] = {
107 VNON
, VFIFO
, VCHR
, VNON
, VDIR
, VNON
, VBLK
, VNON
,
108 VREG
, VNON
, VLNK
, VNON
, VSOCK
, VNON
, VNON
, VBAD
,
110 int vttoif_tab
[9] = {
111 0, S_IFREG
, S_IFDIR
, S_IFBLK
, S_IFCHR
, S_IFLNK
,
112 S_IFSOCK
, S_IFIFO
, S_IFMT
,
115 static void vfree(struct vnode
*vp
);
116 static void vinactive(struct vnode
*vp
);
117 static int vnreclaim(int count
);
119 adjust_vm_object_cache(vm_size_t oval
, vm_size_t nval
);
121 TAILQ_HEAD(freelst
, vnode
) vnode_free_list
; /* vnode free list */
122 TAILQ_HEAD(inactivelst
, vnode
) vnode_inactive_list
; /* vnode inactive list */
123 struct mntlist mountlist
; /* mounted filesystem list */
126 #define VLISTCHECK(fun, vp, list) \
127 if ((vp)->v_freelist.tqe_prev == (struct vnode **)0xdeadb) \
128 panic("%s: %s vnode not on %slist", (fun), (list), (list));
130 #define VINACTIVECHECK(fun, vp, expected) \
132 int __is_inactive = ISSET((vp)->v_flag, VUINACTIVE); \
133 if (__is_inactive ^ expected) \
134 panic("%s: %sinactive vnode, expected %s", (fun), \
135 __is_inactive? "" : "not ", \
136 expected? "inactive": "not inactive"); \
139 #define VLISTCHECK(fun, vp, list)
140 #define VINACTIVECHECK(fun, vp, expected)
141 #endif /* DIAGNOSTIC */
143 #define VLISTNONE(vp) \
145 (vp)->v_freelist.tqe_next = (struct vnode *)0; \
146 (vp)->v_freelist.tqe_prev = (struct vnode **)0xdeadb; \
149 #define VONLIST(vp) \
150 ((vp)->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
152 /* remove a vnode from free vnode list */
153 #define VREMFREE(fun, vp) \
155 VLISTCHECK((fun), (vp), "free"); \
156 TAILQ_REMOVE(&vnode_free_list, (vp), v_freelist); \
161 /* remove a vnode from inactive vnode list */
162 #define VREMINACTIVE(fun, vp) \
164 VLISTCHECK((fun), (vp), "inactive"); \
165 VINACTIVECHECK((fun), (vp), VUINACTIVE); \
166 TAILQ_REMOVE(&vnode_inactive_list, (vp), v_freelist); \
167 CLR((vp)->v_flag, VUINACTIVE); \
172 #define VORECLAIM_ENABLE(vp) \
174 if (ISSET((vp)->v_flag, VORECLAIM)) \
175 panic("vm_object_reclaim already"); \
176 SET((vp)->v_flag, VORECLAIM); \
179 #define VORECLAIM_DISABLE(vp) \
181 CLR((vp)->v_flag, VORECLAIM); \
182 if (ISSET((vp)->v_flag, VXWANT)) { \
183 CLR((vp)->v_flag, VXWANT); \
184 wakeup((caddr_t)(vp)); \
189 * Have to declare first two locks as actual data even if !MACH_SLOCKS, since
190 * a pointers to them get passed around.
192 simple_lock_data_t mountlist_slock
;
193 simple_lock_data_t mntvnode_slock
;
194 decl_simple_lock_data(,mntid_slock
);
195 decl_simple_lock_data(,vnode_free_list_slock
);
196 decl_simple_lock_data(,spechash_slock
);
199 * vnodetarget is the amount of vnodes we expect to get back
200 * from the the inactive vnode list and VM object cache.
201 * As vnreclaim() is a mainly cpu bound operation for faster
202 * processers this number could be higher.
203 * Having this number too high introduces longer delays in
204 * the execution of getnewvnode().
206 unsigned long vnodetarget
; /* target for vnreclaim() */
207 #define VNODE_FREE_TARGET 20 /* Default value for vnodetarget */
210 * We need quite a few vnodes on the free list to sustain the
211 * rapid stat() the compilation process does, and still benefit from the name
212 * cache. Having too few vnodes on the free list causes serious disk
213 * thrashing as we cycle through them.
215 #define VNODE_FREE_MIN 300 /* freelist should have at least these many */
218 * We need to get vnodes back from the VM object cache when a certain #
219 * of vnodes are reused from the freelist. This is essential for the
220 * caching to be effective in the namecache and the buffer cache [for the
223 #define VNODE_TOOMANY_REUSED (VNODE_FREE_MIN/4)
226 * If we have enough vnodes on the freelist we do not want to reclaim
227 * the vnodes from the VM object cache.
229 #define VNODE_FREE_ENOUGH (VNODE_FREE_MIN + (VNODE_FREE_MIN/2))
232 * Initialize the vnode management data structures.
234 __private_extern__
void
237 extern struct lock__bsd__ exchangelock
;
239 simple_lock_init(&mountlist_slock
);
240 simple_lock_init(&mntvnode_slock
);
241 simple_lock_init(&mntid_slock
);
242 simple_lock_init(&spechash_slock
);
243 TAILQ_INIT(&vnode_free_list
);
244 simple_lock_init(&vnode_free_list_slock
);
245 TAILQ_INIT(&vnode_inactive_list
);
246 CIRCLEQ_INIT(&mountlist
);
247 lockinit(&exchangelock
, PVFS
, "exchange", 0, 0);
250 vnodetarget
= VNODE_FREE_TARGET
;
253 * Scale the vm_object_cache to accomodate the vnodes
256 (void) adjust_vm_object_cache(0, desiredvnodes
- VNODE_FREE_MIN
);
259 /* Reset the VM Object Cache with the values passed in */
260 __private_extern__ kern_return_t
261 reset_vmobjectcache(unsigned int val1
, unsigned int val2
)
263 vm_size_t oval
= val1
- VNODE_FREE_MIN
;
266 if(val2
< VNODE_FREE_MIN
)
269 nval
= val2
- VNODE_FREE_MIN
;
271 return(adjust_vm_object_cache(oval
, nval
));
275 * Mark a mount point as busy. Used to synchronize access and to delay
276 * unmounting. Interlock is not released on failure.
279 vfs_busy(mp
, flags
, interlkp
, p
)
282 struct slock
*interlkp
;
287 if (mp
->mnt_kern_flag
& MNTK_UNMOUNT
) {
288 if (flags
& LK_NOWAIT
)
290 mp
->mnt_kern_flag
|= MNTK_MWAIT
;
292 simple_unlock(interlkp
);
294 * Since all busy locks are shared except the exclusive
295 * lock granted when unmounting, the only place that a
296 * wakeup needs to be done is at the release of the
297 * exclusive lock at the end of dounmount.
299 sleep((caddr_t
)mp
, PVFS
);
301 simple_lock(interlkp
);
306 lkflags
|= LK_INTERLOCK
;
307 if (lockmgr(&mp
->mnt_lock
, lkflags
, interlkp
, p
))
308 panic("vfs_busy: unexpected lock failure");
313 * Free a busy filesystem.
321 lockmgr(&mp
->mnt_lock
, LK_RELEASE
, NULL
, p
);
325 * Lookup a filesystem type, and if found allocate and initialize
326 * a mount structure for it.
328 * Devname is usually updated by mount(8) after booting.
331 vfs_rootmountalloc(fstypename
, devname
, mpp
)
336 struct proc
*p
= current_proc(); /* XXX */
337 struct vfsconf
*vfsp
;
340 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
341 if (!strcmp(vfsp
->vfc_name
, fstypename
))
345 mp
= _MALLOC_ZONE((u_long
)sizeof(struct mount
), M_MOUNT
, M_WAITOK
);
346 bzero((char *)mp
, (u_long
)sizeof(struct mount
));
348 /* Initialize the default IO constraints */
349 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
350 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
352 lockinit(&mp
->mnt_lock
, PVFS
, "vfslock", 0, 0);
353 (void)vfs_busy(mp
, LK_NOWAIT
, 0, p
);
354 LIST_INIT(&mp
->mnt_vnodelist
);
356 mp
->mnt_op
= vfsp
->vfc_vfsops
;
357 mp
->mnt_flag
= MNT_RDONLY
;
358 mp
->mnt_vnodecovered
= NULLVP
;
359 vfsp
->vfc_refcount
++;
360 mp
->mnt_stat
.f_type
= vfsp
->vfc_typenum
;
361 mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
362 strncpy(mp
->mnt_stat
.f_fstypename
, vfsp
->vfc_name
, MFSNAMELEN
);
363 mp
->mnt_stat
.f_mntonname
[0] = '/';
364 (void) copystr(devname
, mp
->mnt_stat
.f_mntfromname
, MNAMELEN
- 1, 0);
370 * Find an appropriate filesystem to use for the root. If a filesystem
371 * has not been preselected, walk through the list of known filesystems
372 * trying those that have mountroot routines, and try them until one
373 * works or we have tried them all.
378 struct vfsconf
*vfsp
;
379 extern int (*mountroot
)(void);
382 if (mountroot
!= NULL
) {
383 error
= (*mountroot
)();
387 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
) {
388 if (vfsp
->vfc_mountroot
== NULL
)
390 if ((error
= (*vfsp
->vfc_mountroot
)()) == 0)
393 printf("%s_mountroot failed: %d\n", vfsp
->vfc_name
, error
);
399 * Lookup a mount point by filesystem identifier.
405 register struct mount
*mp
;
407 simple_lock(&mountlist_slock
);
408 CIRCLEQ_FOREACH(mp
, &mountlist
, mnt_list
) {
409 if (mp
->mnt_stat
.f_fsid
.val
[0] == fsid
->val
[0] &&
410 mp
->mnt_stat
.f_fsid
.val
[1] == fsid
->val
[1]) {
411 simple_unlock(&mountlist_slock
);
415 simple_unlock(&mountlist_slock
);
416 return ((struct mount
*)0);
420 * Get a new unique fsid
426 static u_short xxxfs_mntid
;
431 simple_lock(&mntid_slock
);
432 mtype
= mp
->mnt_vfc
->vfc_typenum
;
433 mp
->mnt_stat
.f_fsid
.val
[0] = makedev(nblkdev
+ mtype
, 0);
434 mp
->mnt_stat
.f_fsid
.val
[1] = mtype
;
435 if (xxxfs_mntid
== 0)
437 tfsid
.val
[0] = makedev(nblkdev
+ mtype
, xxxfs_mntid
);
438 tfsid
.val
[1] = mtype
;
439 if (!CIRCLEQ_EMPTY(&mountlist
)) {
440 while (vfs_getvfs(&tfsid
)) {
445 mp
->mnt_stat
.f_fsid
.val
[0] = tfsid
.val
[0];
446 simple_unlock(&mntid_slock
);
450 * Set vnode attributes to VNOVAL
454 register struct vattr
*vap
;
458 vap
->va_size
= vap
->va_bytes
= VNOVAL
;
459 vap
->va_mode
= vap
->va_nlink
= vap
->va_uid
= vap
->va_gid
=
460 vap
->va_fsid
= vap
->va_fileid
=
461 vap
->va_blocksize
= vap
->va_rdev
=
462 vap
->va_atime
.tv_sec
= vap
->va_atime
.tv_nsec
=
463 vap
->va_mtime
.tv_sec
= vap
->va_mtime
.tv_nsec
=
464 vap
->va_ctime
.tv_sec
= vap
->va_ctime
.tv_nsec
=
465 vap
->va_flags
= vap
->va_gen
= VNOVAL
;
470 * Routines having to do with the management of the vnode table.
472 extern int (**dead_vnodeop_p
)(void *);
473 static void vclean
__P((struct vnode
*vp
, int flag
, struct proc
*p
));
474 extern void vgonel
__P((struct vnode
*vp
, struct proc
*p
));
475 long numvnodes
, freevnodes
;
477 long vnode_reclaim_tried
;
478 long vnode_objects_reclaimed
;
481 extern struct vattr va_null
;
484 * Return the next vnode from the free list.
487 getnewvnode(tag
, mp
, vops
, vpp
)
490 int (**vops
)(void *);
493 struct proc
*p
= current_proc(); /* XXX */
495 int cnt
, didretry
= 0;
496 static int reused
= 0; /* track the reuse rate */
500 simple_lock(&vnode_free_list_slock
);
502 * MALLOC a vnode if the number of vnodes has not reached the desired
503 * value and the number on the free list is still reasonable...
504 * reuse from the freelist even though we may evict a name cache entry
505 * to reduce the number of vnodes that accumulate.... vnodes tie up
506 * wired memory and are never garbage collected
508 if (numvnodes
< desiredvnodes
&& (freevnodes
< (2 * VNODE_FREE_MIN
))) {
510 simple_unlock(&vnode_free_list_slock
);
511 MALLOC_ZONE(vp
, struct vnode
*, sizeof *vp
, M_VNODE
, M_WAITOK
);
512 bzero((char *)vp
, sizeof *vp
);
513 VLISTNONE(vp
); /* avoid double queue removal */
514 simple_lock_init(&vp
->v_interlock
);
519 * Once the desired number of vnodes are allocated,
520 * we start reusing the vnodes.
522 if (freevnodes
< VNODE_FREE_MIN
) {
524 * if we are low on vnodes on the freelist attempt to get
525 * some back from the inactive list and VM object cache
527 simple_unlock(&vnode_free_list_slock
);
528 (void)vnreclaim(vnodetarget
);
529 simple_lock(&vnode_free_list_slock
);
531 if (numvnodes
>= desiredvnodes
&& reused
> VNODE_TOOMANY_REUSED
) {
533 if (freevnodes
< VNODE_FREE_ENOUGH
) {
534 simple_unlock(&vnode_free_list_slock
);
535 (void)vnreclaim(vnodetarget
);
536 simple_lock(&vnode_free_list_slock
);
540 for (cnt
= 0, vp
= vnode_free_list
.tqh_first
;
541 vp
!= NULLVP
; cnt
++, vp
= vp
->v_freelist
.tqe_next
) {
542 if (simple_lock_try(&vp
->v_interlock
)) {
543 /* got the interlock */
544 if (ISSET(vp
->v_flag
, VORECLAIM
)) {
545 /* skip over the vnodes that are being reclaimed */
546 simple_unlock(&vp
->v_interlock
);
554 * Unless this is a bad time of the month, at most
555 * the first NCPUS items on the free list are
556 * locked, so this is close enough to being empty.
559 simple_unlock(&vnode_free_list_slock
);
560 if (!(didretry
++) && (vnreclaim(vnodetarget
) > 0))
563 log(LOG_EMERG
, "%d vnodes locked, %d desired, %d numvnodes, "
564 "%d free, %d inactive, %d being reclaimed\n",
565 cnt
, desiredvnodes
, numvnodes
, freevnodes
, inactivevnodes
,
572 panic("free vnode isn't: v_type = %d, v_usecount = %d?",
573 vp
->v_type
, vp
->v_usecount
);
575 VREMFREE("getnewvnode", vp
);
577 simple_unlock(&vnode_free_list_slock
);
580 if (vp
->v_type
!= VBAD
)
581 vgonel(vp
, p
); /* clean and reclaim the vnode */
583 simple_unlock(&vp
->v_interlock
);
586 panic("cleaned vnode isn't");
590 panic("Clean vnode has pending I/O's");
594 if (UBCINFOEXISTS(vp
))
595 panic("getnewvnode: ubcinfo not cleaned");
599 if (vp
->v_flag
& VHASDIRTY
)
602 // make sure all these fields are cleared out as the
603 // name/parent stuff uses them and assumes they're
604 // cleared to null/0.
605 if (vp
->v_scmap
!= NULL
) {
606 panic("getnewvnode: vp @ 0x%x has non-null scmap.\n", vp
);
608 vp
->v_un
.vu_name
= NULL
;
610 vp
->v_un1
.v_cl
.v_pad
= 0;
620 /* we may have blocked, re-evaluate state */
621 simple_lock(&vnode_free_list_slock
);
623 if (vp
->v_usecount
== 0)
624 VREMFREE("getnewvnode", vp
);
625 else if (ISSET((vp
)->v_flag
, VUINACTIVE
))
626 VREMINACTIVE("getnewvnode", vp
);
628 simple_unlock(&vnode_free_list_slock
);
631 vp
->v_flag
= VSTANDARD
;
643 * Move a vnode from one mount queue to another.
651 simple_lock(&mntvnode_slock
);
653 * Delete from old mount point vnode list, if on one.
655 if (vp
->v_mount
!= NULL
)
656 LIST_REMOVE(vp
, v_mntvnodes
);
658 * Insert into list of vnodes for the new mount point, if available.
660 if ((vp
->v_mount
= mp
) != NULL
)
661 LIST_INSERT_HEAD(&mp
->mnt_vnodelist
, vp
, v_mntvnodes
);
662 simple_unlock(&mntvnode_slock
);
666 vpwakeup(struct vnode
*vp
)
669 if (--vp
->v_numoutput
< 0)
670 panic("vpwakeup: neg numoutput");
671 if ((vp
->v_flag
& VBWAIT
|| vp
->v_flag
& VTHROTTLED
)
672 && vp
->v_numoutput
<= 0) {
673 vp
->v_flag
&= ~(VBWAIT
|VTHROTTLED
);
674 wakeup((caddr_t
)&vp
->v_numoutput
);
680 * Update outstanding I/O count and do wakeup if requested.
684 register struct buf
*bp
;
686 CLR(bp
->b_flags
, B_WRITEINPROG
);
691 * Flush out and invalidate all buffers associated with a vnode.
692 * Called with the underlying object locked.
695 vinvalbuf(vp
, flags
, cred
, p
, slpflag
, slptimeo
)
696 register struct vnode
*vp
;
700 int slpflag
, slptimeo
;
702 register struct buf
*bp
;
703 struct buf
*nbp
, *blist
;
706 if (flags
& V_SAVE
) {
707 if (error
= VOP_FSYNC(vp
, cred
, MNT_WAIT
, p
)) {
710 if (vp
->v_dirtyblkhd
.lh_first
)
711 panic("vinvalbuf: dirty bufs (vp 0x%x, bp 0x%x)", vp
, vp
->v_dirtyblkhd
.lh_first
);
715 if ((blist
= vp
->v_cleanblkhd
.lh_first
) && (flags
& V_SAVEMETA
))
716 while (blist
&& blist
->b_lblkno
< 0)
717 blist
= blist
->b_vnbufs
.le_next
;
718 if (!blist
&& (blist
= vp
->v_dirtyblkhd
.lh_first
) &&
719 (flags
& V_SAVEMETA
))
720 while (blist
&& blist
->b_lblkno
< 0)
721 blist
= blist
->b_vnbufs
.le_next
;
725 for (bp
= blist
; bp
; bp
= nbp
) {
726 nbp
= bp
->b_vnbufs
.le_next
;
727 if ((flags
& V_SAVEMETA
) && bp
->b_lblkno
< 0)
730 if (ISSET(bp
->b_flags
, B_BUSY
)) {
731 SET(bp
->b_flags
, B_WANTED
);
732 error
= tsleep((caddr_t
)bp
,
733 slpflag
| (PRIBIO
+ 1), "vinvalbuf",
742 SET(bp
->b_flags
, B_BUSY
);
745 * XXX Since there are no node locks for NFS, I believe
746 * there is a slight chance that a delayed write will
747 * occur while sleeping just above, so check for it.
749 if (ISSET(bp
->b_flags
, B_DELWRI
) && (flags
& V_SAVE
)) {
750 (void) VOP_BWRITE(bp
);
754 if (bp
->b_flags
& B_LOCKED
) {
755 panic("vinvalbuf: bp @ 0x%x is locked!", bp
);
758 SET(bp
->b_flags
, B_INVAL
);
763 if (!(flags
& V_SAVEMETA
) &&
764 (vp
->v_dirtyblkhd
.lh_first
|| vp
->v_cleanblkhd
.lh_first
))
765 panic("vinvalbuf: flush failed");
770 * Create a vnode for a block device.
771 * Used for root filesystem, argdev, and swap areas.
772 * Also used for memory file system special devices.
779 register struct vnode
*vp
;
787 error
= getnewvnode(VT_NON
, (struct mount
*)0, spec_vnodeop_p
, &nvp
);
794 if (nvp
= checkalias(vp
, dev
, (struct mount
*)0)) {
803 * Check to see if the new vnode represents a special device
804 * for which we already have a vnode (either because of
805 * bdevvp() or because of a different vnode representing
806 * the same block device). If such an alias exists, deallocate
807 * the existing contents and return the aliased vnode. The
808 * caller is responsible for filling it with its new contents.
811 checkalias(nvp
, nvp_rdev
, mp
)
812 register struct vnode
*nvp
;
816 struct proc
*p
= current_proc(); /* XXX */
819 struct specinfo
*specinfop
;
821 if (nvp
->v_type
!= VBLK
&& nvp
->v_type
!= VCHR
)
824 MALLOC_ZONE(specinfop
, struct specinfo
*, sizeof(struct specinfo
),
825 M_SPECINFO
, M_WAITOK
);
826 vpp
= &speclisth
[SPECHASH(nvp_rdev
)];
828 simple_lock(&spechash_slock
);
829 for (vp
= *vpp
; vp
; vp
= vp
->v_specnext
) {
830 if (nvp_rdev
!= vp
->v_rdev
|| nvp
->v_type
!= vp
->v_type
)
833 * Alias, but not in use, so flush it out.
835 simple_lock(&vp
->v_interlock
);
836 if (vp
->v_usecount
== 0) {
837 simple_unlock(&spechash_slock
);
841 if (vget(vp
, LK_EXCLUSIVE
| LK_INTERLOCK
, p
)) {
842 simple_unlock(&spechash_slock
);
847 if (vp
== NULL
|| vp
->v_tag
!= VT_NON
) {
848 nvp
->v_specinfo
= specinfop
;
849 specinfop
= 0; /* buffer used */
850 bzero(nvp
->v_specinfo
, sizeof(struct specinfo
));
851 nvp
->v_rdev
= nvp_rdev
;
852 nvp
->v_hashchain
= vpp
;
853 nvp
->v_specnext
= *vpp
;
854 nvp
->v_specflags
= 0;
855 simple_unlock(&spechash_slock
);
858 nvp
->v_flag
|= VALIASED
;
859 vp
->v_flag
|= VALIASED
;
862 /* Since buffer is used just return */
865 simple_unlock(&spechash_slock
);
866 VOP_UNLOCK(vp
, 0, p
);
867 simple_lock(&vp
->v_interlock
);
869 vp
->v_op
= nvp
->v_op
;
870 vp
->v_tag
= nvp
->v_tag
;
874 FREE_ZONE((void *)specinfop
, sizeof(struct specinfo
), M_SPECINFO
);
879 * Get a reference on a particular vnode and lock it if requested.
880 * If the vnode was on the inactive list, remove it from the list.
881 * If the vnode was on the free list, remove it from the list and
882 * move it to inactive list as needed.
883 * The vnode lock bit is set if the vnode is being eliminated in
884 * vgone. The process is awakened when the transition is completed,
885 * and an error returned to indicate that the vnode is no longer
886 * usable (possibly having been changed to a new file system type).
897 vpid
= vp
->v_id
; // save off the original v_id
902 * If the vnode is in the process of being cleaned out for
903 * another use, we wait for the cleaning to finish and then
904 * return failure. Cleaning is determined by checking that
905 * the VXLOCK flag is set.
907 if ((flags
& LK_INTERLOCK
) == 0)
908 simple_lock(&vp
->v_interlock
);
909 if ((vp
->v_flag
& VXLOCK
) || (vp
->v_flag
& VORECLAIM
)) {
910 vp
->v_flag
|= VXWANT
;
911 simple_unlock(&vp
->v_interlock
);
912 (void)tsleep((caddr_t
)vp
, PINOD
, "vget", 0);
917 * vnode is being terminated.
918 * wait for vnode_pager_no_senders() to clear VTERMINATE
920 if (ISSET(vp
->v_flag
, VTERMINATE
)) {
921 SET(vp
->v_flag
, VTERMWANT
);
922 simple_unlock(&vp
->v_interlock
);
923 (void)tsleep((caddr_t
)&vp
->v_ubcinfo
, PINOD
, "vget1", 0);
928 * if the vnode is being initialized,
929 * wait for it to finish initialization
931 if (ISSET(vp
->v_flag
, VUINIT
)) {
932 SET(vp
->v_flag
, VUWANT
);
933 simple_unlock(&vp
->v_interlock
);
934 (void) tsleep((caddr_t
)vp
, PINOD
, "vget2", 0);
938 simple_lock(&vnode_free_list_slock
);
940 if (vp
->v_usecount
== 0)
941 VREMFREE("vget", vp
);
942 else if (ISSET((vp
)->v_flag
, VUINACTIVE
))
943 VREMINACTIVE("vget", vp
);
945 simple_unlock(&vnode_free_list_slock
);
947 if (++vp
->v_usecount
<= 0)
948 panic("vget: v_usecount");
951 * Recover named reference as needed
953 if (UBCISVALID(vp
) && !ubc_issetflags(vp
, UI_HASOBJREF
)) {
954 simple_unlock(&vp
->v_interlock
);
955 if (ubc_getobject(vp
, UBC_HOLDOBJECT
) == MEMORY_OBJECT_CONTROL_NULL
) {
959 simple_lock(&vp
->v_interlock
);
962 if (flags
& LK_TYPE_MASK
) {
963 if (error
= vn_lock(vp
, flags
| LK_INTERLOCK
, p
))
965 if (vpid
!= vp
->v_id
) { // make sure it's still the same vnode
972 if ((flags
& LK_INTERLOCK
) == 0)
973 simple_unlock(&vp
->v_interlock
);
975 if (vpid
!= vp
->v_id
) { // make sure it's still the same vnode
983 simple_lock(&vp
->v_interlock
);
986 * we may have blocked. Re-evaluate the state
988 simple_lock(&vnode_free_list_slock
);
990 if (vp
->v_usecount
== 0)
991 VREMFREE("vget", vp
);
992 else if (ISSET((vp
)->v_flag
, VUINACTIVE
))
993 VREMINACTIVE("vget", vp
);
995 simple_unlock(&vnode_free_list_slock
);
998 * If the vnode was not active in the first place
999 * must not call vrele() as VOP_INACTIVE() is not
1001 * So inlined part of vrele() here.
1003 if (--vp
->v_usecount
== 1) {
1004 if (UBCINFOEXISTS(vp
)) {
1006 simple_unlock(&vp
->v_interlock
);
1010 if (vp
->v_usecount
> 0) {
1011 simple_unlock(&vp
->v_interlock
);
1014 if (vp
->v_usecount
< 0)
1015 panic("vget: negative usecount (%d)", vp
->v_usecount
);
1017 simple_unlock(&vp
->v_interlock
);
1022 * Get a pager reference on the particular vnode.
1024 * This is called from ubc_info_init() and it is asumed that
1025 * the vnode is not on the free list.
1026 * It is also assumed that the vnode is neither being recycled
1027 * by vgonel nor being terminated by vnode_pager_vrele().
1029 * The vnode interlock is NOT held by the caller.
1031 __private_extern__
int
1032 vnode_pager_vget(vp
)
1035 simple_lock(&vp
->v_interlock
);
1037 UBCINFOCHECK("vnode_pager_vget", vp
);
1039 if (ISSET(vp
->v_flag
, (VXLOCK
|VORECLAIM
|VTERMINATE
)))
1040 panic("%s: dying vnode", "vnode_pager_vget");
1042 simple_lock(&vnode_free_list_slock
);
1043 /* The vnode should not be on free list */
1045 if (vp
->v_usecount
== 0)
1046 panic("%s: still on list", "vnode_pager_vget");
1047 else if (ISSET((vp
)->v_flag
, VUINACTIVE
))
1048 VREMINACTIVE("vnode_pager_vget", vp
);
1051 /* The vnode should not be on the inactive list here */
1052 simple_unlock(&vnode_free_list_slock
);
1054 /* After all those checks, now do the real work :-) */
1055 if (++vp
->v_usecount
<= 0)
1056 panic("vnode_pager_vget: v_usecount");
1057 simple_unlock(&vp
->v_interlock
);
1063 * Stubs to use when there is no locking to be done on the underlying object.
1064 * A minimal shared lock is necessary to ensure that the underlying object
1065 * is not revoked while an operation is in progress. So, an active shared
1066 * count is maintained in an auxillary vnode lock structure.
1070 struct vop_lock_args
/* {
1078 * This code cannot be used until all the non-locking filesystems
1079 * (notably NFS) are converted to properly lock and release nodes.
1080 * Also, certain vnode operations change the locking state within
1081 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
1082 * and symlink). Ideally these operations should not change the
1083 * lock state, but should be changed to let the caller of the
1084 * function unlock them. Otherwise all intermediate vnode layers
1085 * (such as union, umapfs, etc) must catch these functions to do
1086 * the necessary locking at their layer. Note that the inactive
1087 * and lookup operations also change their lock state, but this
1088 * cannot be avoided, so these two operations will always need
1089 * to be handled in intermediate layers.
1091 struct vnode
*vp
= ap
->a_vp
;
1092 int vnflags
, flags
= ap
->a_flags
;
1094 if (vp
->v_vnlock
== NULL
) {
1095 if ((flags
& LK_TYPE_MASK
) == LK_DRAIN
)
1097 MALLOC(vp
->v_vnlock
, struct lock__bsd__
*,
1098 sizeof(struct lock__bsd__
), M_TEMP
, M_WAITOK
);
1099 lockinit(vp
->v_vnlock
, PVFS
, "vnlock", 0, 0);
1101 switch (flags
& LK_TYPE_MASK
) {
1107 vnflags
= LK_SHARED
;
1110 case LK_EXCLUPGRADE
:
1115 panic("vop_nolock: bad operation %d", flags
& LK_TYPE_MASK
);
1117 if (flags
& LK_INTERLOCK
)
1118 vnflags
|= LK_INTERLOCK
;
1119 return(lockmgr(vp
->v_vnlock
, vnflags
, &vp
->v_interlock
, ap
->a_p
));
1122 * Since we are not using the lock manager, we must clear
1123 * the interlock here.
1125 if (ap
->a_flags
& LK_INTERLOCK
)
1126 simple_unlock(&ap
->a_vp
->v_interlock
);
1132 * Decrement the active use count.
1136 struct vop_unlock_args
/* {
1142 struct vnode
*vp
= ap
->a_vp
;
1144 if (vp
->v_vnlock
== NULL
)
1146 return (lockmgr(vp
->v_vnlock
, LK_RELEASE
, NULL
, ap
->a_p
));
1150 * Return whether or not the node is in use.
1154 struct vop_islocked_args
/* {
1158 struct vnode
*vp
= ap
->a_vp
;
1160 if (vp
->v_vnlock
== NULL
)
1162 return (lockstatus(vp
->v_vnlock
));
1173 simple_lock(&vp
->v_interlock
);
1174 if (vp
->v_usecount
<= 0)
1175 panic("vref used where vget required");
1177 /* If on the inactive list, remove it from there */
1178 simple_lock(&vnode_free_list_slock
);
1179 if (ISSET((vp
)->v_flag
, VUINACTIVE
))
1180 VREMINACTIVE("vref", vp
);
1181 simple_unlock(&vnode_free_list_slock
);
1183 if (++vp
->v_usecount
<= 0)
1184 panic("vref v_usecount");
1185 simple_unlock(&vp
->v_interlock
);
1189 clean_up_name_parent_ptrs(struct vnode
*vp
)
1191 if (VNAME(vp
) || VPARENT(vp
)) {
1195 // do it this way so we don't block before clearing
1214 * put the vnode on appropriate free list.
1215 * called with v_interlock held.
1222 extern int disable_funnel
;
1224 if ((curflock
= thread_funnel_get()) != kernel_flock
&&
1225 !(disable_funnel
&& curflock
!= THR_FUNNEL_NULL
))
1226 panic("Entering vfree() without kernel funnel");
1229 * if the vnode is not obtained by calling getnewvnode() we
1230 * are not responsible for the cleanup. Just return.
1232 if (!(vp
->v_flag
& VSTANDARD
)) {
1236 if (vp
->v_usecount
!= 0)
1237 panic("vfree: v_usecount");
1239 /* insert at tail of LRU list or at head if VAGE is set */
1240 simple_lock(&vnode_free_list_slock
);
1242 // make sure the name & parent pointers get cleared out
1243 // clean_up_name_parent_ptrs(vp);
1246 panic("%s: vnode still on list", "vfree");
1248 if (vp
->v_flag
& VAGE
) {
1249 TAILQ_INSERT_HEAD(&vnode_free_list
, vp
, v_freelist
);
1250 vp
->v_flag
&= ~VAGE
;
1252 TAILQ_INSERT_TAIL(&vnode_free_list
, vp
, v_freelist
);
1254 simple_unlock(&vnode_free_list_slock
);
1259 * put the vnode on the inactive list.
1260 * called with v_interlock held
1267 extern int disable_funnel
;
1269 if ((curflock
= thread_funnel_get()) != kernel_flock
&&
1270 !(disable_funnel
&& curflock
!= THR_FUNNEL_NULL
))
1271 panic("Entering vinactive() without kernel funnel");
1273 if (!UBCINFOEXISTS(vp
))
1274 panic("vinactive: not a UBC vnode");
1276 if (vp
->v_usecount
!= 1)
1277 panic("vinactive: v_usecount");
1279 simple_lock(&vnode_free_list_slock
);
1282 panic("%s: vnode still on list", "vinactive");
1283 VINACTIVECHECK("vinactive", vp
, 0);
1285 TAILQ_INSERT_TAIL(&vnode_inactive_list
, vp
, v_freelist
);
1286 SET(vp
->v_flag
, VUINACTIVE
);
1287 CLR(vp
->v_flag
, (VNOCACHE_DATA
| VRAOFF
));
1290 simple_unlock(&vnode_free_list_slock
);
1296 * vput(), just unlock and vrele()
1302 struct proc
*p
= current_proc(); /* XXX */
1304 simple_lock(&vp
->v_interlock
);
1305 if (--vp
->v_usecount
== 1) {
1306 if (UBCINFOEXISTS(vp
)) {
1308 simple_unlock(&vp
->v_interlock
);
1309 VOP_UNLOCK(vp
, 0, p
);
1313 if (vp
->v_usecount
> 0) {
1314 simple_unlock(&vp
->v_interlock
);
1315 VOP_UNLOCK(vp
, 0, p
);
1319 if (vp
->v_usecount
< 0 || vp
->v_writecount
!= 0) {
1320 vprint("vput: bad ref count", vp
);
1321 panic("vput: v_usecount = %d, v_writecount = %d",
1322 vp
->v_usecount
, vp
->v_writecount
);
1325 simple_lock(&vnode_free_list_slock
);
1326 if (ISSET((vp
)->v_flag
, VUINACTIVE
))
1327 VREMINACTIVE("vref", vp
);
1328 simple_unlock(&vnode_free_list_slock
);
1330 simple_unlock(&vp
->v_interlock
);
1331 VOP_INACTIVE(vp
, p
);
1333 * The interlock is not held and
1334 * VOP_INCATIVE releases the vnode lock.
1335 * We could block and the vnode might get reactivated
1336 * Can not just call vfree without checking the state
1338 simple_lock(&vp
->v_interlock
);
1340 if (vp
->v_usecount
== 0)
1342 else if ((vp
->v_usecount
== 1) && UBCINFOEXISTS(vp
))
1345 simple_unlock(&vp
->v_interlock
);
1350 * If count drops to zero, call inactive routine and return to freelist.
1356 struct proc
*p
= current_proc(); /* XXX */
1358 extern int disable_funnel
;
1360 if ((curflock
= thread_funnel_get()) != kernel_flock
&&
1361 !(disable_funnel
&& curflock
!= THR_FUNNEL_NULL
))
1362 panic("Entering vrele() without kernel funnel");
1364 simple_lock(&vp
->v_interlock
);
1365 if (--vp
->v_usecount
== 1) {
1366 if (UBCINFOEXISTS(vp
)) {
1367 if ((vp
->v_flag
& VXLOCK
) == 0)
1369 simple_unlock(&vp
->v_interlock
);
1373 if (vp
->v_usecount
> 0) {
1374 simple_unlock(&vp
->v_interlock
);
1378 if (vp
->v_usecount
< 0 || vp
->v_writecount
!= 0) {
1379 vprint("vrele: bad ref count", vp
);
1380 panic("vrele: ref cnt");
1384 if ((vp
->v_flag
& VXLOCK
) || (vp
->v_flag
& VORECLAIM
)) {
1385 /* vnode is being cleaned, just return */
1387 simple_unlock(&vp
->v_interlock
);
1391 if (vn_lock(vp
, LK_EXCLUSIVE
| LK_INTERLOCK
, p
) == 0) {
1392 VOP_INACTIVE(vp
, p
);
1394 * vn_lock releases the interlock and
1395 * VOP_INCATIVE releases the vnode lock.
1396 * We could block and the vnode might get reactivated
1397 * Can not just call vfree without checking the state
1399 simple_lock(&vp
->v_interlock
);
1401 if (vp
->v_usecount
== 0)
1403 else if ((vp
->v_usecount
== 1) && UBCINFOEXISTS(vp
))
1406 simple_unlock(&vp
->v_interlock
);
1411 simple_unlock(&vp
->v_interlock
);
1412 kprintf("vrele: vn_lock() failed for vp = 0x%08x\n", vp
);
1421 simple_lock(&vp
->v_interlock
);
1423 simple_unlock(&vp
->v_interlock
);
1428 * Page or buffer structure gets a reference.
1432 register struct vnode
*vp
;
1435 simple_lock(&vp
->v_interlock
);
1437 simple_unlock(&vp
->v_interlock
);
1441 * Page or buffer structure frees a reference.
1445 register struct vnode
*vp
;
1448 simple_lock(&vp
->v_interlock
);
1449 if (vp
->v_holdcnt
<= 0)
1450 panic("holdrele: holdcnt");
1452 simple_unlock(&vp
->v_interlock
);
1456 * Remove any vnodes in the vnode table belonging to mount point mp.
1458 * If MNT_NOFORCE is specified, there should not be any active ones,
1459 * return error if any are found (nb: this is a user error, not a
1460 * system error). If MNT_FORCE is specified, detach any active vnodes
1464 int busyprt
= 0; /* print out busy vnodes */
1466 struct ctldebug debug1
= { "busyprt", &busyprt
};
1471 vflush(mp
, skipvp
, flags
)
1473 struct vnode
*skipvp
;
1476 struct proc
*p
= current_proc();
1477 struct vnode
*vp
, *nvp
;
1480 simple_lock(&mntvnode_slock
);
1482 for (vp
= mp
->mnt_vnodelist
.lh_first
; vp
; vp
= nvp
) {
1483 if (vp
->v_mount
!= mp
)
1485 nvp
= vp
->v_mntvnodes
.le_next
;
1487 * Skip over a selected vnode.
1492 simple_lock(&vp
->v_interlock
);
1494 * Skip over a vnodes marked VSYSTEM or VNOFLUSH.
1496 if ((flags
& SKIPSYSTEM
) && ((vp
->v_flag
& VSYSTEM
) || (vp
->v_flag
& VNOFLUSH
))) {
1497 simple_unlock(&vp
->v_interlock
);
1501 * Skip over a vnodes marked VSWAP.
1503 if ((flags
& SKIPSWAP
) && (vp
->v_flag
& VSWAP
)) {
1504 simple_unlock(&vp
->v_interlock
);
1508 * If WRITECLOSE is set, only flush out regular file
1509 * vnodes open for writing.
1511 if ((flags
& WRITECLOSE
) &&
1512 (vp
->v_writecount
== 0 || vp
->v_type
!= VREG
)) {
1513 simple_unlock(&vp
->v_interlock
);
1517 * With v_usecount == 0, all we need to do is clear
1518 * out the vnode data structures and we are done.
1520 if (vp
->v_usecount
== 0) {
1521 simple_unlock(&mntvnode_slock
);
1523 simple_lock(&mntvnode_slock
);
1527 * If FORCECLOSE is set, forcibly close the vnode.
1528 * For block or character devices, revert to an
1529 * anonymous device. For all other files, just kill them.
1531 if (flags
& FORCECLOSE
) {
1532 simple_unlock(&mntvnode_slock
);
1533 if (vp
->v_type
!= VBLK
&& vp
->v_type
!= VCHR
) {
1537 vp
->v_op
= spec_vnodeop_p
;
1538 insmntque(vp
, (struct mount
*)0);
1540 simple_lock(&mntvnode_slock
);
1545 vprint("vflush: busy vnode", vp
);
1547 simple_unlock(&vp
->v_interlock
);
1550 simple_unlock(&mntvnode_slock
);
1551 if (busy
&& ((flags
& FORCECLOSE
)==0))
1557 * Disassociate the underlying file system from a vnode.
1558 * The vnode interlock is held on entry.
1561 vclean(vp
, flags
, p
)
1570 * if the vnode is not obtained by calling getnewvnode() we
1571 * are not responsible for the cleanup. Just return.
1573 if (!(vp
->v_flag
& VSTANDARD
)) {
1574 simple_unlock(&vp
->v_interlock
);
1579 * Check to see if the vnode is in use.
1580 * If so we have to reference it before we clean it out
1581 * so that its count cannot fall to zero and generate a
1582 * race against ourselves to recycle it.
1584 if (active
= vp
->v_usecount
) {
1586 * active vnode can not be on the free list.
1587 * we are about to take an extra reference on this vnode
1588 * do the queue management as needed
1589 * Not doing so can cause "still on list" or
1590 * "vnreclaim: v_usecount" panic if VOP_LOCK() blocks.
1592 simple_lock(&vnode_free_list_slock
);
1593 if (ISSET((vp
)->v_flag
, VUINACTIVE
))
1594 VREMINACTIVE("vclean", vp
);
1595 simple_unlock(&vnode_free_list_slock
);
1597 if (++vp
->v_usecount
<= 0)
1598 panic("vclean: v_usecount");
1602 * Prevent the vnode from being recycled or
1603 * brought into use while we clean it out.
1605 if (vp
->v_flag
& VXLOCK
)
1606 panic("vclean: deadlock");
1607 vp
->v_flag
|= VXLOCK
;
1610 * Even if the count is zero, the VOP_INACTIVE routine may still
1611 * have the object locked while it cleans it out. The VOP_LOCK
1612 * ensures that the VOP_INACTIVE routine is done with its work.
1613 * For active vnodes, it ensures that no other activity can
1614 * occur while the underlying object is being cleaned out.
1616 VOP_LOCK(vp
, LK_DRAIN
| LK_INTERLOCK
, p
);
1619 * While blocked in VOP_LOCK() someone could have dropped
1620 * reference[s] and we could land on the inactive list.
1621 * if this vnode is on the inactive list
1622 * take it off the list.
1624 simple_lock(&vnode_free_list_slock
);
1625 if (ISSET((vp
)->v_flag
, VUINACTIVE
))
1626 VREMINACTIVE("vclean", vp
);
1627 simple_unlock(&vnode_free_list_slock
);
1629 /* Clean the pages in VM. */
1630 if (active
&& (flags
& DOCLOSE
))
1631 VOP_CLOSE(vp
, IO_NDELAY
, NOCRED
, p
);
1633 /* Clean the pages in VM. */
1634 didhold
= ubc_hold(vp
);
1635 if ((active
) && (didhold
))
1636 (void)ubc_clean(vp
, 0); /* do not invalidate */
1639 * Clean out any buffers associated with the vnode.
1641 if (flags
& DOCLOSE
) {
1642 if (vp
->v_tag
== VT_NFS
)
1643 nfs_vinvalbuf(vp
, V_SAVE
, NOCRED
, p
, 0);
1645 vinvalbuf(vp
, V_SAVE
, NOCRED
, p
, 0, 0);
1649 VOP_INACTIVE(vp
, p
);
1651 VOP_UNLOCK(vp
, 0, p
);
1653 /* Destroy ubc named reference */
1656 ubc_destroy_named(vp
);
1659 * Make sure vp isn't on the inactive list.
1661 simple_lock(&vnode_free_list_slock
);
1662 if (ISSET((vp
)->v_flag
, VUINACTIVE
)) {
1663 VREMINACTIVE("vclean", vp
);
1665 simple_unlock(&vnode_free_list_slock
);
1668 * Reclaim the vnode.
1670 if (VOP_RECLAIM(vp
, p
))
1671 panic("vclean: cannot reclaim");
1673 // make sure the name & parent ptrs get cleaned out!
1674 clean_up_name_parent_ptrs(vp
);
1678 struct lock__bsd__
*tmp
= vp
->v_vnlock
;
1679 if ((tmp
->lk_flags
& LK_DRAINED
) == 0)
1680 vprint("vclean: lock not drained", vp
);
1681 vp
->v_vnlock
= NULL
;
1685 /* It's dead, Jim! */
1686 vp
->v_op
= dead_vnodeop_p
;
1689 insmntque(vp
, (struct mount
*)0);
1692 * Done with purge, notify sleepers of the grim news.
1694 vp
->v_flag
&= ~VXLOCK
;
1695 if (vp
->v_flag
& VXWANT
) {
1696 vp
->v_flag
&= ~VXWANT
;
1697 wakeup((caddr_t
)vp
);
1705 * Eliminate all activity associated with the requested vnode
1706 * and with all vnodes aliased to the requested vnode.
1710 struct vop_revoke_args
/* {
1715 struct vnode
*vp
, *vq
;
1716 struct proc
*p
= current_proc();
1719 if ((ap
->a_flags
& REVOKEALL
) == 0)
1720 panic("vop_revoke");
1724 simple_lock(&vp
->v_interlock
);
1726 if (vp
->v_flag
& VALIASED
) {
1728 * If a vgone (or vclean) is already in progress,
1729 * wait until it is done and return.
1731 if (vp
->v_flag
& VXLOCK
) {
1732 while (vp
->v_flag
& VXLOCK
) {
1733 vp
->v_flag
|= VXWANT
;
1734 simple_unlock(&vp
->v_interlock
);
1735 (void)tsleep((caddr_t
)vp
, PINOD
, "vop_revokeall", 0);
1740 * Ensure that vp will not be vgone'd while we
1741 * are eliminating its aliases.
1743 vp
->v_flag
|= VXLOCK
;
1744 simple_unlock(&vp
->v_interlock
);
1745 while (vp
->v_flag
& VALIASED
) {
1746 simple_lock(&spechash_slock
);
1747 for (vq
= *vp
->v_hashchain
; vq
; vq
= vq
->v_specnext
) {
1748 if (vq
->v_rdev
!= vp
->v_rdev
||
1749 vq
->v_type
!= vp
->v_type
|| vp
== vq
)
1751 simple_unlock(&spechash_slock
);
1756 simple_unlock(&spechash_slock
);
1759 * Remove the lock so that vgone below will
1760 * really eliminate the vnode after which time
1761 * vgone will awaken any sleepers.
1763 simple_lock(&vp
->v_interlock
);
1764 vp
->v_flag
&= ~VXLOCK
;
1771 * Recycle an unused vnode to the front of the free list.
1772 * Release the passed interlock if the vnode will be recycled.
1775 vrecycle(vp
, inter_lkp
, p
)
1777 struct slock
*inter_lkp
;
1781 simple_lock(&vp
->v_interlock
);
1782 if (vp
->v_usecount
== 0) {
1784 simple_unlock(inter_lkp
);
1788 simple_unlock(&vp
->v_interlock
);
1793 * Eliminate all activity associated with a vnode
1794 * in preparation for reuse.
1800 struct proc
*p
= current_proc();
1802 simple_lock(&vp
->v_interlock
);
1807 * vgone, with the vp interlock held.
1818 * if the vnode is not obtained by calling getnewvnode() we
1819 * are not responsible for the cleanup. Just return.
1821 if (!(vp
->v_flag
& VSTANDARD
)) {
1822 simple_unlock(&vp
->v_interlock
);
1827 * If a vgone (or vclean) is already in progress,
1828 * wait until it is done and return.
1830 if (vp
->v_flag
& VXLOCK
) {
1831 while (vp
->v_flag
& VXLOCK
) {
1832 vp
->v_flag
|= VXWANT
;
1833 simple_unlock(&vp
->v_interlock
);
1834 (void)tsleep((caddr_t
)vp
, PINOD
, "vgone", 0);
1839 * Clean out the filesystem specific data.
1841 vclean(vp
, DOCLOSE
, p
);
1843 * Delete from old mount point vnode list, if on one.
1845 if (vp
->v_mount
!= NULL
)
1846 insmntque(vp
, (struct mount
*)0);
1848 * If special device, remove it from special device alias list
1851 if ((vp
->v_type
== VBLK
|| vp
->v_type
== VCHR
) && vp
->v_specinfo
!= 0) {
1852 simple_lock(&spechash_slock
);
1853 if (*vp
->v_hashchain
== vp
) {
1854 *vp
->v_hashchain
= vp
->v_specnext
;
1856 for (vq
= *vp
->v_hashchain
; vq
; vq
= vq
->v_specnext
) {
1857 if (vq
->v_specnext
!= vp
)
1859 vq
->v_specnext
= vp
->v_specnext
;
1863 panic("missing bdev");
1865 if (vp
->v_flag
& VALIASED
) {
1867 for (vq
= *vp
->v_hashchain
; vq
; vq
= vq
->v_specnext
) {
1868 if (vq
->v_rdev
!= vp
->v_rdev
||
1869 vq
->v_type
!= vp
->v_type
)
1876 panic("missing alias");
1878 vx
->v_flag
&= ~VALIASED
;
1879 vp
->v_flag
&= ~VALIASED
;
1881 simple_unlock(&spechash_slock
);
1883 struct specinfo
*tmp
= vp
->v_specinfo
;
1884 vp
->v_specinfo
= NULL
;
1885 FREE_ZONE((void *)tmp
, sizeof(struct specinfo
), M_SPECINFO
);
1889 * If it is on the freelist and not already at the head,
1890 * move it to the head of the list. The test of the back
1891 * pointer and the reference count of zero is because
1892 * it will be removed from the free list by getnewvnode,
1893 * but will not have its reference count incremented until
1894 * after calling vgone. If the reference count were
1895 * incremented first, vgone would (incorrectly) try to
1896 * close the previous instance of the underlying object.
1897 * So, the back pointer is explicitly set to `0xdeadb' in
1898 * getnewvnode after removing it from the freelist to ensure
1899 * that we do not try to move it here.
1901 if (vp
->v_usecount
== 0 && (vp
->v_flag
& VUINACTIVE
) == 0) {
1902 simple_lock(&vnode_free_list_slock
);
1903 if ((vp
->v_freelist
.tqe_prev
!= (struct vnode
**)0xdeadb) &&
1904 vnode_free_list
.tqh_first
!= vp
) {
1905 TAILQ_REMOVE(&vnode_free_list
, vp
, v_freelist
);
1906 TAILQ_INSERT_HEAD(&vnode_free_list
, vp
, v_freelist
);
1908 simple_unlock(&vnode_free_list_slock
);
1914 * Lookup a vnode by device number.
1917 vfinddev(dev
, type
, vpp
)
1925 simple_lock(&spechash_slock
);
1926 for (vp
= speclisth
[SPECHASH(dev
)]; vp
; vp
= vp
->v_specnext
) {
1927 if (dev
!= vp
->v_rdev
|| type
!= vp
->v_type
)
1933 simple_unlock(&spechash_slock
);
1938 * Calculate the total number of references to a special device.
1944 struct vnode
*vq
, *vnext
;
1948 if ((vp
->v_flag
& VALIASED
) == 0)
1949 return (vp
->v_usecount
);
1950 simple_lock(&spechash_slock
);
1951 for (count
= 0, vq
= *vp
->v_hashchain
; vq
; vq
= vnext
) {
1952 vnext
= vq
->v_specnext
;
1953 if (vq
->v_rdev
!= vp
->v_rdev
|| vq
->v_type
!= vp
->v_type
)
1956 * Alias, but not in use, so flush it out.
1958 if (vq
->v_usecount
== 0 && vq
!= vp
) {
1959 simple_unlock(&spechash_slock
);
1963 count
+= vq
->v_usecount
;
1965 simple_unlock(&spechash_slock
);
1969 int prtactive
= 0; /* 1 => print out reclaim of active vnodes */
1972 * Print out a description of a vnode.
1974 static char *typename
[] =
1975 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1980 register struct vnode
*vp
;
1985 printf("%s: ", label
);
1986 printf("type %s, usecount %d, writecount %d, refcount %d,",
1987 typename
[vp
->v_type
], vp
->v_usecount
, vp
->v_writecount
,
1990 if (vp
->v_flag
& VROOT
)
1991 strcat(buf
, "|VROOT");
1992 if (vp
->v_flag
& VTEXT
)
1993 strcat(buf
, "|VTEXT");
1994 if (vp
->v_flag
& VSYSTEM
)
1995 strcat(buf
, "|VSYSTEM");
1996 if (vp
->v_flag
& VNOFLUSH
)
1997 strcat(buf
, "|VNOFLUSH");
1998 if (vp
->v_flag
& VXLOCK
)
1999 strcat(buf
, "|VXLOCK");
2000 if (vp
->v_flag
& VXWANT
)
2001 strcat(buf
, "|VXWANT");
2002 if (vp
->v_flag
& VBWAIT
)
2003 strcat(buf
, "|VBWAIT");
2004 if (vp
->v_flag
& VALIASED
)
2005 strcat(buf
, "|VALIASED");
2007 printf(" flags (%s)", &buf
[1]);
2008 if (vp
->v_data
== NULL
) {
2018 * List all of the locked vnodes in the system.
2019 * Called when debugging the kernel.
2024 struct proc
*p
= current_proc();
2025 struct mount
*mp
, *nmp
;
2028 printf("Locked vnodes\n");
2029 simple_lock(&mountlist_slock
);
2030 for (mp
= mountlist
.cqh_first
; mp
!= (void *)&mountlist
; mp
= nmp
) {
2031 if (vfs_busy(mp
, LK_NOWAIT
, &mountlist_slock
, p
)) {
2032 nmp
= mp
->mnt_list
.cqe_next
;
2035 for (vp
= mp
->mnt_vnodelist
.lh_first
;
2037 vp
= vp
->v_mntvnodes
.le_next
) {
2038 if (VOP_ISLOCKED(vp
))
2039 vprint((char *)0, vp
);
2041 simple_lock(&mountlist_slock
);
2042 nmp
= mp
->mnt_list
.cqe_next
;
2045 simple_unlock(&mountlist_slock
);
2050 build_path(struct vnode
*vp
, char *buff
, int buflen
, int *outlen
)
2053 int i
, len
, ret
=0, counter
=0;
2055 end
= &buff
[buflen
-1];
2058 while(vp
&& VPARENT(vp
) != vp
) {
2059 // the maximum depth of a file system hierarchy is MAXPATHLEN/2
2060 // (with single-char names separated by slashes). we panic if
2061 // we've ever looped more than that.
2062 if (counter
++ > MAXPATHLEN
/2) {
2063 panic("build_path: vnode parent chain is too long! vp 0x%x\n", vp
);
2066 if (VNAME(vp
) == NULL
) {
2067 if (VPARENT(vp
) != NULL
) {
2073 // count how long the string is
2074 for(len
=0; *str
; str
++, len
++)
2077 // check that there's enough space
2078 if ((end
- buff
) < len
) {
2083 // copy it backwards
2084 for(; len
> 0; len
--) {
2088 // put in the path separator
2091 // walk up the chain.
2094 // check if we're crossing a mount point and
2095 // switch the vp if we are.
2096 if (vp
&& (vp
->v_flag
& VROOT
)) {
2097 vp
= vp
->v_mount
->mnt_vnodecovered
;
2101 // slide it down to the beginning of the buffer
2102 memmove(buff
, end
, &buff
[buflen
] - end
);
2104 *outlen
= &buff
[buflen
] - end
;
2109 __private_extern__
int
2110 vn_getpath(struct vnode
*vp
, char *pathbuf
, int *len
)
2112 return build_path(vp
, pathbuf
, *len
, len
);
2118 * Top level filesystem related information gathering.
2121 vfs_sysctl(name
, namelen
, oldp
, oldlenp
, newp
, newlen
, p
)
2130 struct vfsconf
*vfsp
;
2136 * The VFS_NUMMNTOPS shouldn't be at name[0] since
2137 * is a VFS generic variable. So now we must check
2138 * namelen so we don't end up covering any UFS
2139 * variables (sinc UFS vfc_typenum is 1).
2141 * It should have been:
2142 * name[0]: VFS_GENERIC
2143 * name[1]: VFS_NUMMNTOPS
2145 if (namelen
== 1 && name
[0] == VFS_NUMMNTOPS
) {
2146 extern unsigned int vfs_nummntops
;
2147 return (sysctl_rdint(oldp
, oldlenp
, newp
, vfs_nummntops
));
2150 /* all sysctl names at this level are at least name and field */
2152 return (EISDIR
); /* overloaded */
2153 if (name
[0] != VFS_GENERIC
) {
2154 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
2155 if (vfsp
->vfc_typenum
== name
[0])
2158 return (EOPNOTSUPP
);
2159 return ((*vfsp
->vfc_vfsops
->vfs_sysctl
)(&name
[1], namelen
- 1,
2160 oldp
, oldlenp
, newp
, newlen
, p
));
2163 case VFS_MAXTYPENUM
:
2164 return (sysctl_rdint(oldp
, oldlenp
, newp
, maxvfsconf
));
2167 return (ENOTDIR
); /* overloaded */
2168 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
2169 if (vfsp
->vfc_typenum
== name
[2])
2172 return (EOPNOTSUPP
);
2173 return (sysctl_rdstruct(oldp
, oldlenp
, newp
, vfsp
,
2174 sizeof(struct vfsconf
)));
2177 * We need to get back into the general MIB, so we need to re-prepend
2178 * CTL_VFS to our name and try userland_sysctl().
2180 usernamelen
= namelen
+ 1;
2181 MALLOC(username
, int *, usernamelen
* sizeof(*username
),
2183 bcopy(name
, username
+ 1, namelen
* sizeof(*name
));
2184 username
[0] = CTL_VFS
;
2185 error
= userland_sysctl(p
, username
, usernamelen
, oldp
, oldlenp
, 1,
2186 newp
, newlen
, oldlenp
);
2187 FREE(username
, M_TEMP
);
2191 int kinfo_vdebug
= 1;
2192 #define KINFO_VNODESLOP 10
2194 * Dump vnode list (via sysctl).
2195 * Copyout address of vnode followed by vnode.
2199 sysctl_vnode(where
, sizep
, p
)
2204 struct mount
*mp
, *nmp
;
2205 struct vnode
*nvp
, *vp
;
2206 char *bp
= where
, *savebp
;
2210 #define VPTRSZ sizeof (struct vnode *)
2211 #define VNODESZ sizeof (struct vnode)
2212 if (where
== NULL
) {
2213 *sizep
= (numvnodes
+ KINFO_VNODESLOP
) * (VPTRSZ
+ VNODESZ
);
2216 ewhere
= where
+ *sizep
;
2218 simple_lock(&mountlist_slock
);
2219 for (mp
= mountlist
.cqh_first
; mp
!= (void *)&mountlist
; mp
= nmp
) {
2220 if (vfs_busy(mp
, LK_NOWAIT
, &mountlist_slock
, p
)) {
2221 nmp
= mp
->mnt_list
.cqe_next
;
2226 simple_lock(&mntvnode_slock
);
2227 for (vp
= mp
->mnt_vnodelist
.lh_first
;
2231 * Check that the vp is still associated with
2232 * this filesystem. RACE: could have been
2233 * recycled onto the same filesystem.
2235 if (vp
->v_mount
!= mp
) {
2236 simple_unlock(&mntvnode_slock
);
2238 printf("kinfo: vp changed\n");
2242 nvp
= vp
->v_mntvnodes
.le_next
;
2243 if (bp
+ VPTRSZ
+ VNODESZ
> ewhere
) {
2244 simple_unlock(&mntvnode_slock
);
2246 *sizep
= bp
- where
;
2249 simple_unlock(&mntvnode_slock
);
2250 if ((error
= copyout((caddr_t
)&vp
, bp
, VPTRSZ
)) ||
2251 (error
= copyout((caddr_t
)vp
, bp
+ VPTRSZ
, VNODESZ
))) {
2255 bp
+= VPTRSZ
+ VNODESZ
;
2256 simple_lock(&mntvnode_slock
);
2258 simple_unlock(&mntvnode_slock
);
2259 simple_lock(&mountlist_slock
);
2260 nmp
= mp
->mnt_list
.cqe_next
;
2263 simple_unlock(&mountlist_slock
);
2265 *sizep
= bp
- where
;
2270 * Check to see if a filesystem is mounted on a block device.
2279 if (vp
->v_specflags
& SI_MOUNTEDON
)
2281 if (vp
->v_flag
& VALIASED
) {
2282 simple_lock(&spechash_slock
);
2283 for (vq
= *vp
->v_hashchain
; vq
; vq
= vq
->v_specnext
) {
2284 if (vq
->v_rdev
!= vp
->v_rdev
||
2285 vq
->v_type
!= vp
->v_type
)
2287 if (vq
->v_specflags
& SI_MOUNTEDON
) {
2292 simple_unlock(&spechash_slock
);
2298 * Unmount all filesystems. The list is traversed in reverse order
2299 * of mounting to avoid dependencies.
2301 __private_extern__
void
2304 struct mount
*mp
, *nmp
;
2305 struct proc
*p
= current_proc();
2308 * Since this only runs when rebooting, it is not interlocked.
2310 for (mp
= mountlist
.cqh_last
; mp
!= (void *)&mountlist
; mp
= nmp
) {
2311 nmp
= mp
->mnt_list
.cqe_prev
;
2312 (void) dounmount(mp
, MNT_FORCE
, p
);
2317 * Build hash lists of net addresses and hang them off the mount point.
2318 * Called by vfs_export() to set up the lists of export addresses.
2321 vfs_hang_addrlist(mp
, nep
, argp
)
2323 struct netexport
*nep
;
2324 struct export_args
*argp
;
2326 register struct netcred
*np
;
2327 register struct radix_node_head
*rnh
;
2329 struct radix_node
*rn
;
2330 struct sockaddr
*saddr
, *smask
= 0;
2334 if (argp
->ex_addrlen
== 0) {
2335 if (mp
->mnt_flag
& MNT_DEFEXPORTED
)
2337 np
= &nep
->ne_defexported
;
2338 np
->netc_exflags
= argp
->ex_flags
;
2339 np
->netc_anon
= argp
->ex_anon
;
2340 np
->netc_anon
.cr_ref
= 1;
2341 mp
->mnt_flag
|= MNT_DEFEXPORTED
;
2344 i
= sizeof(struct netcred
) + argp
->ex_addrlen
+ argp
->ex_masklen
;
2345 MALLOC(np
, struct netcred
*, i
, M_NETADDR
, M_WAITOK
);
2346 bzero((caddr_t
)np
, i
);
2347 saddr
= (struct sockaddr
*)(np
+ 1);
2348 if (error
= copyin(argp
->ex_addr
, (caddr_t
)saddr
, argp
->ex_addrlen
))
2350 if (saddr
->sa_len
> argp
->ex_addrlen
)
2351 saddr
->sa_len
= argp
->ex_addrlen
;
2352 if (argp
->ex_masklen
) {
2353 smask
= (struct sockaddr
*)((caddr_t
)saddr
+ argp
->ex_addrlen
);
2354 error
= copyin(argp
->ex_addr
, (caddr_t
)smask
, argp
->ex_masklen
);
2357 if (smask
->sa_len
> argp
->ex_masklen
)
2358 smask
->sa_len
= argp
->ex_masklen
;
2360 i
= saddr
->sa_family
;
2361 if ((rnh
= nep
->ne_rtable
[i
]) == 0) {
2363 * Seems silly to initialize every AF when most are not
2364 * used, do so on demand here
2366 for (dom
= domains
; dom
; dom
= dom
->dom_next
)
2367 if (dom
->dom_family
== i
&& dom
->dom_rtattach
) {
2368 dom
->dom_rtattach((void **)&nep
->ne_rtable
[i
],
2372 if ((rnh
= nep
->ne_rtable
[i
]) == 0) {
2377 rn
= (*rnh
->rnh_addaddr
)((caddr_t
)saddr
, (caddr_t
)smask
, rnh
,
2381 * One of the reasons that rnh_addaddr may fail is that
2382 * the entry already exists. To check for this case, we
2383 * look up the entry to see if it is there. If so, we
2384 * do not need to make a new entry but do return success.
2386 _FREE(np
, M_NETADDR
);
2387 rn
= (*rnh
->rnh_matchaddr
)((caddr_t
)saddr
, rnh
);
2388 if (rn
!= 0 && (rn
->rn_flags
& RNF_ROOT
) == 0 &&
2389 ((struct netcred
*)rn
)->netc_exflags
== argp
->ex_flags
&&
2390 !bcmp((caddr_t
)&((struct netcred
*)rn
)->netc_anon
,
2391 (caddr_t
)&argp
->ex_anon
, sizeof(struct ucred
)))
2395 np
->netc_exflags
= argp
->ex_flags
;
2396 np
->netc_anon
= argp
->ex_anon
;
2397 np
->netc_anon
.cr_ref
= 1;
2400 _FREE(np
, M_NETADDR
);
2406 vfs_free_netcred(rn
, w
)
2407 struct radix_node
*rn
;
2410 register struct radix_node_head
*rnh
= (struct radix_node_head
*)w
;
2412 (*rnh
->rnh_deladdr
)(rn
->rn_key
, rn
->rn_mask
, rnh
);
2413 _FREE((caddr_t
)rn
, M_NETADDR
);
2418 * Free the net address hash lists that are hanging off the mount points.
2421 vfs_free_addrlist(nep
)
2422 struct netexport
*nep
;
2425 register struct radix_node_head
*rnh
;
2427 for (i
= 0; i
<= AF_MAX
; i
++)
2428 if (rnh
= nep
->ne_rtable
[i
]) {
2429 (*rnh
->rnh_walktree
)(rnh
, vfs_free_netcred
,
2431 _FREE((caddr_t
)rnh
, M_RTABLE
);
2432 nep
->ne_rtable
[i
] = 0;
2437 vfs_export(mp
, nep
, argp
)
2439 struct netexport
*nep
;
2440 struct export_args
*argp
;
2444 if (argp
->ex_flags
& MNT_DELEXPORT
) {
2445 vfs_free_addrlist(nep
);
2446 mp
->mnt_flag
&= ~(MNT_EXPORTED
| MNT_DEFEXPORTED
);
2448 if (argp
->ex_flags
& MNT_EXPORTED
) {
2449 if (error
= vfs_hang_addrlist(mp
, nep
, argp
))
2451 mp
->mnt_flag
|= MNT_EXPORTED
;
2457 vfs_export_lookup(mp
, nep
, nam
)
2458 register struct mount
*mp
;
2459 struct netexport
*nep
;
2462 register struct netcred
*np
;
2463 register struct radix_node_head
*rnh
;
2464 struct sockaddr
*saddr
;
2467 if (mp
->mnt_flag
& MNT_EXPORTED
) {
2469 * Lookup in the export list first.
2472 saddr
= mtod(nam
, struct sockaddr
*);
2473 rnh
= nep
->ne_rtable
[saddr
->sa_family
];
2475 np
= (struct netcred
*)
2476 (*rnh
->rnh_matchaddr
)((caddr_t
)saddr
,
2478 if (np
&& np
->netc_rnodes
->rn_flags
& RNF_ROOT
)
2483 * If no address match, use the default if it exists.
2485 if (np
== NULL
&& mp
->mnt_flag
& MNT_DEFEXPORTED
)
2486 np
= &nep
->ne_defexported
;
2492 * try to reclaim vnodes from the memory
2496 vm_object_cache_reclaim(int count
)
2499 void vnode_pager_release_from_cache(int *);
2501 /* attempt to reclaim vnodes from VM object cache */
2503 vnode_pager_release_from_cache(&cnt
);
2508 * Release memory object reference held by inactive vnodes
2509 * and then try to reclaim some vnodes from the memory
2513 vnreclaim(int count
)
2523 /* Try to release "count" vnodes from the inactive list */
2525 if (++loopcnt
> inactivevnodes
) {
2527 * I did my best trying to reclaim the vnodes.
2528 * Do not try any more as that would only lead to
2529 * long latencies. Also in the worst case
2530 * this can get totally CPU bound.
2531 * Just fall though and attempt a reclaim of VM
2537 simple_lock(&vnode_free_list_slock
);
2538 for (vp
= TAILQ_FIRST(&vnode_inactive_list
);
2539 (vp
!= NULLVP
) && (i
< count
);
2540 vp
= TAILQ_NEXT(vp
, v_freelist
)) {
2542 if (!simple_lock_try(&vp
->v_interlock
))
2545 if (vp
->v_usecount
!= 1)
2546 panic("vnreclaim: v_usecount");
2548 if(!UBCINFOEXISTS(vp
)) {
2549 if (vp
->v_type
== VBAD
) {
2550 VREMINACTIVE("vnreclaim", vp
);
2551 simple_unlock(&vp
->v_interlock
);
2554 panic("non UBC vnode on inactive list");
2555 /* Should not reach here */
2558 /* If vnode is already being reclaimed, wait */
2559 if ((vp
->v_flag
& VXLOCK
) || (vp
->v_flag
& VORECLAIM
)) {
2560 vp
->v_flag
|= VXWANT
;
2561 simple_unlock(&vp
->v_interlock
);
2562 simple_unlock(&vnode_free_list_slock
);
2563 (void)tsleep((caddr_t
)vp
, PINOD
, "vocr", 0);
2568 * if the vnode is being initialized,
2571 if (ISSET(vp
->v_flag
, VUINIT
)) {
2572 SET(vp
->v_flag
, VUWANT
);
2573 simple_unlock(&vp
->v_interlock
);
2577 VREMINACTIVE("vnreclaim", vp
);
2578 simple_unlock(&vnode_free_list_slock
);
2580 if (ubc_issetflags(vp
, UI_WASMAPPED
)) {
2582 * We should not reclaim as it is likely
2583 * to be in use. Let it die a natural death.
2584 * Release the UBC reference if one exists
2585 * and put it back at the tail.
2587 simple_unlock(&vp
->v_interlock
);
2588 if (ubc_release_named(vp
)) {
2589 if (UBCINFOEXISTS(vp
)) {
2590 simple_lock(&vp
->v_interlock
);
2591 if (vp
->v_usecount
== 1 && !VONLIST(vp
))
2593 simple_unlock(&vp
->v_interlock
);
2596 simple_lock(&vp
->v_interlock
);
2598 simple_unlock(&vp
->v_interlock
);
2603 VORECLAIM_ENABLE(vp
);
2606 * scrub the dirty pages and invalidate the buffers
2609 err
= vn_lock(vp
, LK_EXCLUSIVE
|LK_INTERLOCK
, p
);
2611 /* cannot reclaim */
2612 simple_lock(&vp
->v_interlock
);
2614 VORECLAIM_DISABLE(vp
);
2616 simple_unlock(&vp
->v_interlock
);
2620 /* keep the vnode alive so we can kill it */
2621 simple_lock(&vp
->v_interlock
);
2622 if(vp
->v_usecount
!= 1)
2623 panic("VOCR: usecount race");
2625 simple_unlock(&vp
->v_interlock
);
2627 /* clean up the state in VM without invalidating */
2628 didhold
= ubc_hold(vp
);
2630 (void)ubc_clean(vp
, 0);
2632 /* flush and invalidate buffers associated with the vnode */
2633 if (vp
->v_tag
== VT_NFS
)
2634 nfs_vinvalbuf(vp
, V_SAVE
, NOCRED
, p
, 0);
2636 vinvalbuf(vp
, V_SAVE
, NOCRED
, p
, 0, 0);
2639 * Note: for the v_usecount == 2 case, VOP_INACTIVE
2640 * has not yet been called. Call it now while vp is
2641 * still locked, it will also release the lock.
2643 if (vp
->v_usecount
== 2)
2644 VOP_INACTIVE(vp
, p
);
2646 VOP_UNLOCK(vp
, 0, p
);
2652 * destroy the ubc named reference.
2653 * If we can't because it is held for I/Os
2654 * in progress, just put it back on the inactive
2655 * list and move on. Otherwise, the paging reference
2656 * is toast (and so is this vnode?).
2658 if (ubc_destroy_named(vp
)) {
2661 simple_lock(&vp
->v_interlock
);
2662 VORECLAIM_DISABLE(vp
);
2663 simple_unlock(&vp
->v_interlock
);
2664 vrele(vp
); /* release extra use we added here */
2666 /* inactive list lock was released, must restart */
2669 simple_unlock(&vnode_free_list_slock
);
2671 vnode_reclaim_tried
+= i
;
2673 i
= vm_object_cache_reclaim(count
);
2674 vnode_objects_reclaimed
+= i
;
2680 * This routine is called from vnode_pager_no_senders()
2681 * which in turn can be called with vnode locked by vnode_uncache()
2682 * But it could also get called as a result of vm_object_cache_trim().
2683 * In that case lock state is unknown.
2684 * AGE the vnode so that it gets recycled quickly.
2685 * Check lock status to decide whether to call vput() or vrele().
2687 __private_extern__
void
2688 vnode_pager_vrele(struct vnode
*vp
)
2691 boolean_t funnel_state
;
2692 int isvnreclaim
= 1;
2694 funnel_state
= thread_funnel_set(kernel_flock
, TRUE
);
2696 /* Mark the vnode to be recycled */
2699 simple_lock(&vp
->v_interlock
);
2701 * If a vgone (or vclean) is already in progress,
2702 * Do not bother with the ubc_info cleanup.
2703 * Let the vclean deal with it.
2705 if (vp
->v_flag
& VXLOCK
) {
2706 CLR(vp
->v_flag
, VTERMINATE
);
2707 if (ISSET(vp
->v_flag
, VTERMWANT
)) {
2708 CLR(vp
->v_flag
, VTERMWANT
);
2709 wakeup((caddr_t
)&vp
->v_ubcinfo
);
2711 simple_unlock(&vp
->v_interlock
);
2713 (void) thread_funnel_set(kernel_flock
, funnel_state
);
2717 /* It's dead, Jim! */
2718 if (!ISSET(vp
->v_flag
, VORECLAIM
)) {
2720 * called as a result of eviction of the memory
2721 * object from the memory object cache
2725 /* So serialize vnode operations */
2726 VORECLAIM_ENABLE(vp
);
2728 if (!ISSET(vp
->v_flag
, VTERMINATE
))
2729 SET(vp
->v_flag
, VTERMINATE
);
2733 if (UBCINFOEXISTS(vp
)) {
2734 struct ubc_info
*uip
= vp
->v_ubcinfo
;
2736 if (ubc_issetflags(vp
, UI_WASMAPPED
))
2737 SET(vp
->v_flag
, VWASMAPPED
);
2739 vp
->v_ubcinfo
= UBC_NOINFO
; /* catch bad accesses */
2740 simple_unlock(&vp
->v_interlock
);
2741 ubc_info_deallocate(uip
);
2743 if ((vp
->v_type
== VBAD
) && ((vp
)->v_ubcinfo
!= UBC_INFO_NULL
)
2744 && ((vp
)->v_ubcinfo
!= UBC_NOINFO
)) {
2745 struct ubc_info
*uip
= vp
->v_ubcinfo
;
2747 vp
->v_ubcinfo
= UBC_NOINFO
; /* catch bad accesses */
2748 simple_unlock(&vp
->v_interlock
);
2749 ubc_info_deallocate(uip
);
2751 simple_unlock(&vp
->v_interlock
);
2755 CLR(vp
->v_flag
, VTERMINATE
);
2757 if (vp
->v_type
!= VBAD
){
2758 vgone(vp
); /* revoke the vnode */
2759 vrele(vp
); /* and drop the reference */
2763 if (ISSET(vp
->v_flag
, VTERMWANT
)) {
2764 CLR(vp
->v_flag
, VTERMWANT
);
2765 wakeup((caddr_t
)&vp
->v_ubcinfo
);
2768 VORECLAIM_DISABLE(vp
);
2769 (void) thread_funnel_set(kernel_flock
, funnel_state
);
2775 int walk_vnodes_debug
=0;
2780 struct mount
*mp
, *nmp
;
2784 for (mp
= mountlist
.cqh_first
; mp
!= (void *)&mountlist
; mp
= nmp
) {
2785 for (vp
= mp
->mnt_vnodelist
.lh_first
;
2787 vp
= vp
->v_mntvnodes
.le_next
) {
2788 if (vp
->v_usecount
< 0){
2789 if(walk_vnodes_debug
) {
2790 printf("vp is %x\n",vp
);
2794 nmp
= mp
->mnt_list
.cqe_next
;
2796 for (cnt
= 0, vp
= vnode_free_list
.tqh_first
;
2797 vp
!= NULLVP
; cnt
++, vp
= vp
->v_freelist
.tqe_next
) {
2798 if ((vp
->v_usecount
< 0) && walk_vnodes_debug
) {
2799 if(walk_vnodes_debug
) {
2800 printf("vp is %x\n",vp
);
2804 printf("%d - free\n", cnt
);
2806 for (cnt
= 0, vp
= vnode_inactive_list
.tqh_first
;
2807 vp
!= NULLVP
; cnt
++, vp
= vp
->v_freelist
.tqe_next
) {
2808 if ((vp
->v_usecount
< 0) && walk_vnodes_debug
) {
2809 if(walk_vnodes_debug
) {
2810 printf("vp is %x\n",vp
);
2814 printf("%d - inactive\n", cnt
);
2816 #endif /* DIAGNOSTIC */
2819 struct x_constraints
{
2820 u_int32_t x_maxreadcnt
;
2821 u_int32_t x_maxsegreadsize
;
2822 u_int32_t x_maxsegwritesize
;
2827 vfs_io_attributes(vp
, flags
, iosize
, vectors
)
2829 int flags
; /* B_READ or B_WRITE */
2835 /* start with "reasonable" defaults */
2843 if (mp
->mnt_kern_flag
& MNTK_IO_XINFO
)
2844 *iosize
= ((struct x_constraints
*)(mp
->mnt_xinfo_ptr
))->x_maxreadcnt
;
2846 *iosize
= mp
->mnt_maxreadcnt
;
2847 *vectors
= mp
->mnt_segreadcnt
;
2850 *iosize
= mp
->mnt_maxwritecnt
;
2851 *vectors
= mp
->mnt_segwritecnt
;
2866 vfs_io_maxsegsize(vp
, flags
, maxsegsize
)
2868 int flags
; /* B_READ or B_WRITE */
2873 /* start with "reasonable" default */
2874 *maxsegsize
= MAXPHYS
;
2880 if (mp
->mnt_kern_flag
& MNTK_IO_XINFO
)
2881 *maxsegsize
= ((struct x_constraints
*)(mp
->mnt_xinfo_ptr
))->x_maxsegreadsize
;
2884 * if the extended info doesn't exist
2885 * then use the maxread I/O size as the
2886 * max segment size... this is the previous behavior
2888 *maxsegsize
= mp
->mnt_maxreadcnt
;
2891 if (mp
->mnt_kern_flag
& MNTK_IO_XINFO
)
2892 *maxsegsize
= ((struct x_constraints
*)(mp
->mnt_xinfo_ptr
))->x_maxsegwritesize
;
2895 * if the extended info doesn't exist
2896 * then use the maxwrite I/O size as the
2897 * max segment size... this is the previous behavior
2899 *maxsegsize
= mp
->mnt_maxwritecnt
;
2904 if (*maxsegsize
== 0)
2905 *maxsegsize
= MAXPHYS
;
2910 #include <sys/disk.h>
2914 vfs_init_io_attributes(devvp
, mp
)
2915 struct vnode
*devvp
;
2920 off_t writeblockcnt
;
2931 struct proc
*p
= current_proc();
2932 struct ucred
*cred
= p
->p_ucred
;
2936 * determine if this mount point exists on the same device as the root
2937 * partition... if so, then it comes under the hard throttle control
2940 static int rootunit
= -1;
2941 extern struct vnode
*rootvp
;
2943 if (rootunit
== -1) {
2944 if (VOP_IOCTL(rootvp
, DKIOCGETBSDUNIT
, (caddr_t
)&rootunit
, 0, cred
, p
))
2946 else if (rootvp
== devvp
)
2947 mp
->mnt_kern_flag
|= MNTK_ROOTDEV
;
2949 if (devvp
!= rootvp
&& rootunit
!= -1) {
2950 if (VOP_IOCTL(devvp
, DKIOCGETBSDUNIT
, (caddr_t
)&thisunit
, 0, cred
, p
) == 0) {
2951 if (thisunit
== rootunit
)
2952 mp
->mnt_kern_flag
|= MNTK_ROOTDEV
;
2955 if (VOP_IOCTL(devvp
, DKIOCGETISVIRTUAL
, (caddr_t
)&isvirtual
, 0, cred
, p
) == 0) {
2957 mp
->mnt_kern_flag
|= MNTK_VIRTUALDEV
;
2960 if ((error
= VOP_IOCTL(devvp
, DKIOCGETMAXBLOCKCOUNTREAD
,
2961 (caddr_t
)&readblockcnt
, 0, cred
, p
)))
2964 if ((error
= VOP_IOCTL(devvp
, DKIOCGETMAXBLOCKCOUNTWRITE
,
2965 (caddr_t
)&writeblockcnt
, 0, cred
, p
)))
2968 if ((error
= VOP_IOCTL(devvp
, DKIOCGETMAXBYTECOUNTREAD
,
2969 (caddr_t
)&readmaxcnt
, 0, cred
, p
)))
2972 if ((error
= VOP_IOCTL(devvp
, DKIOCGETMAXBYTECOUNTWRITE
,
2973 (caddr_t
)&writemaxcnt
, 0, cred
, p
)))
2976 if ((error
= VOP_IOCTL(devvp
, DKIOCGETMAXSEGMENTCOUNTREAD
,
2977 (caddr_t
)&readsegcnt
, 0, cred
, p
)))
2980 if ((error
= VOP_IOCTL(devvp
, DKIOCGETMAXSEGMENTCOUNTWRITE
,
2981 (caddr_t
)&writesegcnt
, 0, cred
, p
)))
2984 if ((error
= VOP_IOCTL(devvp
, DKIOCGETMAXSEGMENTBYTECOUNTREAD
,
2985 (caddr_t
)&readsegsize
, 0, cred
, p
)))
2988 if ((error
= VOP_IOCTL(devvp
, DKIOCGETMAXSEGMENTBYTECOUNTWRITE
,
2989 (caddr_t
)&writesegsize
, 0, cred
, p
)))
2992 if ((error
= VOP_IOCTL(devvp
, DKIOCGETBLOCKSIZE
,
2993 (caddr_t
)&blksize
, 0, cred
, p
)))
2997 if ( !(mp
->mnt_kern_flag
& MNTK_IO_XINFO
)) {
2998 MALLOC(mp
->mnt_xinfo_ptr
, void *, sizeof(struct x_constraints
), M_TEMP
, M_WAITOK
);
2999 mp
->mnt_kern_flag
|= MNTK_IO_XINFO
;
3003 temp
= (readmaxcnt
> UINT32_MAX
) ? UINT32_MAX
: readmaxcnt
;
3006 temp
= readblockcnt
* blksize
;
3007 temp
= (temp
> UINT32_MAX
) ? UINT32_MAX
: temp
;
3011 ((struct x_constraints
*)(mp
->mnt_xinfo_ptr
))->x_maxreadcnt
= (u_int32_t
)temp
;
3014 temp
= (writemaxcnt
> UINT32_MAX
) ? UINT32_MAX
: writemaxcnt
;
3016 if (writeblockcnt
) {
3017 temp
= writeblockcnt
* blksize
;
3018 temp
= (temp
> UINT32_MAX
) ? UINT32_MAX
: temp
;
3022 mp
->mnt_maxwritecnt
= (u_int32_t
)temp
;
3025 temp
= (readsegcnt
> UINT16_MAX
) ? UINT16_MAX
: readsegcnt
;
3026 mp
->mnt_segreadcnt
= (u_int16_t
)temp
;
3029 temp
= (writesegcnt
> UINT16_MAX
) ? UINT16_MAX
: writesegcnt
;
3030 mp
->mnt_segwritecnt
= (u_int16_t
)temp
;
3033 temp
= (readsegsize
> UINT32_MAX
) ? UINT32_MAX
: readsegsize
;
3035 temp
= mp
->mnt_maxreadcnt
;
3036 ((struct x_constraints
*)(mp
->mnt_xinfo_ptr
))->x_maxsegreadsize
= (u_int32_t
)temp
;
3039 temp
= (writesegsize
> UINT32_MAX
) ? UINT32_MAX
: writesegsize
;
3041 temp
= mp
->mnt_maxwritecnt
;
3042 ((struct x_constraints
*)(mp
->mnt_xinfo_ptr
))->x_maxsegwritesize
= (u_int32_t
)temp
;
3047 static struct klist fs_klist
;
3050 vfs_event_init(void)
3053 klist_init(&fs_klist
);
3057 vfs_event_signal(fsid_t
*fsid
, u_int32_t event
, intptr_t data
)
3060 KNOTE(&fs_klist
, event
);
3064 * return the number of mounted filesystems.
3067 sysctl_vfs_getvfscnt(void)
3072 simple_lock(&mountlist_slock
);
3073 CIRCLEQ_FOREACH(mp
, &mountlist
, mnt_list
)
3075 simple_unlock(&mountlist_slock
);
3080 * fill in the array of fsid_t's up to a max of 'count', the actual
3081 * number filled in will be set in '*actual'. If there are more fsid_t's
3082 * than room in fsidlst then ENOMEM will be returned and '*actual' will
3083 * have the actual count.
3084 * having *actual filled out even in the error case is depended upon.
3087 sysctl_vfs_getvfslist(fsid_t
*fsidlst
, int count
, int *actual
)
3092 simple_lock(&mountlist_slock
);
3093 CIRCLEQ_FOREACH(mp
, &mountlist
, mnt_list
) {
3095 if (*actual
<= count
)
3096 fsidlst
[(*actual
) - 1] = mp
->mnt_stat
.f_fsid
;
3098 simple_unlock(&mountlist_slock
);
3099 return (*actual
<= count
? 0 : ENOMEM
);
3103 sysctl_vfs_vfslist SYSCTL_HANDLER_ARGS
3109 /* This is a readonly node. */
3110 if (req
->newptr
!= NULL
)
3113 /* they are querying us so just return the space required. */
3114 if (req
->oldptr
== NULL
) {
3115 req
->oldidx
= sysctl_vfs_getvfscnt() * sizeof(fsid_t
);
3120 * Retrieve an accurate count of the amount of space required to copy
3121 * out all the fsids in the system.
3123 space
= req
->oldlen
;
3124 req
->oldlen
= sysctl_vfs_getvfscnt() * sizeof(fsid_t
);
3126 /* they didn't give us enough space. */
3127 if (space
< req
->oldlen
)
3130 MALLOC(fsidlst
, fsid_t
*, req
->oldlen
, M_TEMP
, M_WAITOK
);
3131 error
= sysctl_vfs_getvfslist(fsidlst
, req
->oldlen
/ sizeof(fsid_t
),
3134 * If we get back ENOMEM, then another mount has been added while we
3135 * slept in malloc above. If this is the case then try again.
3137 if (error
== ENOMEM
) {
3138 FREE(fsidlst
, M_TEMP
);
3139 req
->oldlen
= space
;
3143 error
= SYSCTL_OUT(req
, fsidlst
, actual
* sizeof(fsid_t
));
3145 FREE(fsidlst
, M_TEMP
);
3150 * Do a sysctl by fsid.
3153 sysctl_vfs_ctlbyfsid SYSCTL_HANDLER_ARGS
3160 int error
, flags
, namelen
;
3166 error
= SYSCTL_IN(req
, &vc
, sizeof(vc
));
3169 if (vc
.vc_vers
!= VFS_CTL_VERS1
)
3171 mp
= vfs_getvfs(&vc
.vc_fsid
);
3174 /* reset so that the fs specific code can fetch it. */
3177 * Note if this is a VFS_CTL then we pass the actual sysctl req
3178 * in for "oldp" so that the lower layer can DTRT and use the
3179 * SYSCTL_IN/OUT routines.
3181 if (mp
->mnt_op
->vfs_sysctl
!= NULL
) {
3182 error
= mp
->mnt_op
->vfs_sysctl(name
, namelen
,
3183 req
, NULL
, NULL
, 0, req
->p
);
3184 if (error
!= EOPNOTSUPP
)
3188 case VFS_CTL_UMOUNT
:
3189 VCTLTOREQ(&vc
, req
);
3190 error
= SYSCTL_IN(req
, &flags
, sizeof(flags
));
3193 error
= safedounmount(mp
, flags
, p
);
3195 case VFS_CTL_STATFS
:
3196 VCTLTOREQ(&vc
, req
);
3197 error
= SYSCTL_IN(req
, &flags
, sizeof(flags
));
3201 if (((flags
& MNT_NOWAIT
) == 0 || (flags
& MNT_WAIT
)) &&
3202 (error
= VFS_STATFS(mp
, sp
, p
)))
3204 sp
->f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
3205 error
= SYSCTL_OUT(req
, sp
, sizeof(*sp
));
3208 return (EOPNOTSUPP
);
3213 static int filt_fsattach(struct knote
*kn
);
3214 static void filt_fsdetach(struct knote
*kn
);
3215 static int filt_fsevent(struct knote
*kn
, long hint
);
3217 struct filterops fs_filtops
=
3218 { 0, filt_fsattach
, filt_fsdetach
, filt_fsevent
};
3221 filt_fsattach(struct knote
*kn
)
3224 kn
->kn_flags
|= EV_CLEAR
;
3225 KNOTE_ATTACH(&fs_klist
, kn
);
3230 filt_fsdetach(struct knote
*kn
)
3233 KNOTE_DETACH(&fs_klist
, kn
);
3237 filt_fsevent(struct knote
*kn
, long hint
)
3240 kn
->kn_fflags
|= hint
;
3241 return (kn
->kn_fflags
!= 0);
3245 sysctl_vfs_noremotehang SYSCTL_HANDLER_ARGS
3252 /* We need a pid. */
3253 if (req
->newptr
== NULL
)
3256 error
= SYSCTL_IN(req
, &pid
, sizeof(pid
));
3260 p
= pfind(pid
< 0 ? -pid
: pid
);
3265 * Fetching the value is ok, but we only fetch if the old
3268 if (req
->oldptr
!= NULL
) {
3269 out
= !((p
->p_flag
& P_NOREMOTEHANG
) == 0);
3270 error
= SYSCTL_OUT(req
, &out
, sizeof(out
));
3274 /* cansignal offers us enough security. */
3275 if (p
!= req
->p
&& suser(req
->p
->p_ucred
, &req
->p
->p_acflag
) != 0)
3279 p
->p_flag
&= ~P_NOREMOTEHANG
;
3281 p
->p_flag
|= P_NOREMOTEHANG
;
3285 /* the vfs.generic. branch. */
3286 SYSCTL_NODE(_vfs
, VFS_GENERIC
, generic
, CTLFLAG_RW
, 0, "vfs generic hinge");
3287 /* retreive a list of mounted filesystem fsid_t */
3288 SYSCTL_PROC(_vfs_generic
, OID_AUTO
, vfsidlist
, CTLFLAG_RD
,
3289 0, 0, sysctl_vfs_vfslist
, "S,fsid", "List of mounted filesystem ids");
3290 /* perform operations on filesystem via fsid_t */
3291 SYSCTL_NODE(_vfs_generic
, OID_AUTO
, ctlbyfsid
, CTLFLAG_RW
,
3292 sysctl_vfs_ctlbyfsid
, "ctlbyfsid");
3293 SYSCTL_PROC(_vfs_generic
, OID_AUTO
, noremotehang
, CTLFLAG_RW
,
3294 0, 0, sysctl_vfs_noremotehang
, "I", "noremotehang");