2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 * Copyright (c) 1989, 1993
25 * The Regents of the University of California. All rights reserved.
26 * (c) UNIX System Laboratories, Inc.
27 * All or some portions of this file are derived from material licensed
28 * to the University of California by American Telephone and Telegraph
29 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
30 * the permission of UNIX System Laboratories, Inc.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
64 * External virtual filesystem routines
70 #include <sys/param.h>
71 #include <sys/systm.h>
73 #include <sys/mount.h>
75 #include <sys/vnode.h>
77 #include <sys/namei.h>
78 #include <sys/ucred.h>
80 #include <sys/errno.h>
81 #include <sys/malloc.h>
82 #include <sys/domain.h>
84 #include <sys/syslog.h>
87 #include <sys/sysctl.h>
88 #include <sys/filedesc.h>
89 #include <sys/event.h>
92 #include <machine/spl.h>
95 #include <kern/assert.h>
97 #include <miscfs/specfs/specdev.h>
99 #include <mach/mach_types.h>
100 #include <mach/memory_object_types.h>
103 enum vtype iftovt_tab
[16] = {
104 VNON
, VFIFO
, VCHR
, VNON
, VDIR
, VNON
, VBLK
, VNON
,
105 VREG
, VNON
, VLNK
, VNON
, VSOCK
, VNON
, VNON
, VBAD
,
107 int vttoif_tab
[9] = {
108 0, S_IFREG
, S_IFDIR
, S_IFBLK
, S_IFCHR
, S_IFLNK
,
109 S_IFSOCK
, S_IFIFO
, S_IFMT
,
112 static void vfree(struct vnode
*vp
);
113 static void vinactive(struct vnode
*vp
);
114 static int vnreclaim(int count
);
116 adjust_vm_object_cache(vm_size_t oval
, vm_size_t nval
);
118 TAILQ_HEAD(freelst
, vnode
) vnode_free_list
; /* vnode free list */
119 TAILQ_HEAD(inactivelst
, vnode
) vnode_inactive_list
; /* vnode inactive list */
120 struct mntlist mountlist
; /* mounted filesystem list */
123 #define VLISTCHECK(fun, vp, list) \
124 if ((vp)->v_freelist.tqe_prev == (struct vnode **)0xdeadb) \
125 panic("%s: %s vnode not on %slist", (fun), (list), (list));
127 #define VINACTIVECHECK(fun, vp, expected) \
129 int __is_inactive = ISSET((vp)->v_flag, VUINACTIVE); \
130 if (__is_inactive ^ expected) \
131 panic("%s: %sinactive vnode, expected %s", (fun), \
132 __is_inactive? "" : "not ", \
133 expected? "inactive": "not inactive"); \
136 #define VLISTCHECK(fun, vp, list)
137 #define VINACTIVECHECK(fun, vp, expected)
138 #endif /* DIAGNOSTIC */
140 #define VLISTNONE(vp) \
142 (vp)->v_freelist.tqe_next = (struct vnode *)0; \
143 (vp)->v_freelist.tqe_prev = (struct vnode **)0xdeadb; \
146 #define VONLIST(vp) \
147 ((vp)->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
149 /* remove a vnode from free vnode list */
150 #define VREMFREE(fun, vp) \
152 VLISTCHECK((fun), (vp), "free"); \
153 TAILQ_REMOVE(&vnode_free_list, (vp), v_freelist); \
158 /* remove a vnode from inactive vnode list */
159 #define VREMINACTIVE(fun, vp) \
161 VLISTCHECK((fun), (vp), "inactive"); \
162 VINACTIVECHECK((fun), (vp), VUINACTIVE); \
163 TAILQ_REMOVE(&vnode_inactive_list, (vp), v_freelist); \
164 CLR((vp)->v_flag, VUINACTIVE); \
169 #define VORECLAIM_ENABLE(vp) \
171 if (ISSET((vp)->v_flag, VORECLAIM)) \
172 panic("vm_object_reclaim already"); \
173 SET((vp)->v_flag, VORECLAIM); \
176 #define VORECLAIM_DISABLE(vp) \
178 CLR((vp)->v_flag, VORECLAIM); \
179 if (ISSET((vp)->v_flag, VXWANT)) { \
180 CLR((vp)->v_flag, VXWANT); \
181 wakeup((caddr_t)(vp)); \
186 * Have to declare first two locks as actual data even if !MACH_SLOCKS, since
187 * a pointers to them get passed around.
189 simple_lock_data_t mountlist_slock
;
190 simple_lock_data_t mntvnode_slock
;
191 decl_simple_lock_data(,mntid_slock
);
192 decl_simple_lock_data(,vnode_free_list_slock
);
193 decl_simple_lock_data(,spechash_slock
);
196 * vnodetarget is the amount of vnodes we expect to get back
197 * from the the inactive vnode list and VM object cache.
198 * As vnreclaim() is a mainly cpu bound operation for faster
199 * processers this number could be higher.
200 * Having this number too high introduces longer delays in
201 * the execution of getnewvnode().
203 unsigned long vnodetarget
; /* target for vnreclaim() */
204 #define VNODE_FREE_TARGET 20 /* Default value for vnodetarget */
207 * We need quite a few vnodes on the free list to sustain the
208 * rapid stat() the compilation process does, and still benefit from the name
209 * cache. Having too few vnodes on the free list causes serious disk
210 * thrashing as we cycle through them.
212 #define VNODE_FREE_MIN 300 /* freelist should have at least these many */
215 * We need to get vnodes back from the VM object cache when a certain #
216 * of vnodes are reused from the freelist. This is essential for the
217 * caching to be effective in the namecache and the buffer cache [for the
220 #define VNODE_TOOMANY_REUSED (VNODE_FREE_MIN/4)
223 * If we have enough vnodes on the freelist we do not want to reclaim
224 * the vnodes from the VM object cache.
226 #define VNODE_FREE_ENOUGH (VNODE_FREE_MIN + (VNODE_FREE_MIN/2))
229 * Initialize the vnode management data structures.
231 __private_extern__
void
234 extern struct lock__bsd__ exchangelock
;
236 simple_lock_init(&mountlist_slock
);
237 simple_lock_init(&mntvnode_slock
);
238 simple_lock_init(&mntid_slock
);
239 simple_lock_init(&spechash_slock
);
240 TAILQ_INIT(&vnode_free_list
);
241 simple_lock_init(&vnode_free_list_slock
);
242 TAILQ_INIT(&vnode_inactive_list
);
243 CIRCLEQ_INIT(&mountlist
);
244 lockinit(&exchangelock
, PVFS
, "exchange", 0, 0);
247 vnodetarget
= VNODE_FREE_TARGET
;
250 * Scale the vm_object_cache to accomodate the vnodes
253 (void) adjust_vm_object_cache(0, desiredvnodes
- VNODE_FREE_MIN
);
256 /* Reset the VM Object Cache with the values passed in */
257 __private_extern__ kern_return_t
258 reset_vmobjectcache(unsigned int val1
, unsigned int val2
)
260 vm_size_t oval
= val1
- VNODE_FREE_MIN
;
263 if(val2
< VNODE_FREE_MIN
)
266 nval
= val2
- VNODE_FREE_MIN
;
268 return(adjust_vm_object_cache(oval
, nval
));
272 * Mark a mount point as busy. Used to synchronize access and to delay
273 * unmounting. Interlock is not released on failure.
276 vfs_busy(mp
, flags
, interlkp
, p
)
279 struct slock
*interlkp
;
284 if (mp
->mnt_kern_flag
& MNTK_UNMOUNT
) {
285 if (flags
& LK_NOWAIT
)
287 mp
->mnt_kern_flag
|= MNTK_MWAIT
;
289 simple_unlock(interlkp
);
291 * Since all busy locks are shared except the exclusive
292 * lock granted when unmounting, the only place that a
293 * wakeup needs to be done is at the release of the
294 * exclusive lock at the end of dounmount.
296 sleep((caddr_t
)mp
, PVFS
);
298 simple_lock(interlkp
);
303 lkflags
|= LK_INTERLOCK
;
304 if (lockmgr(&mp
->mnt_lock
, lkflags
, interlkp
, p
))
305 panic("vfs_busy: unexpected lock failure");
310 * Free a busy filesystem.
318 lockmgr(&mp
->mnt_lock
, LK_RELEASE
, NULL
, p
);
322 * Lookup a filesystem type, and if found allocate and initialize
323 * a mount structure for it.
325 * Devname is usually updated by mount(8) after booting.
328 vfs_rootmountalloc(fstypename
, devname
, mpp
)
333 struct proc
*p
= current_proc(); /* XXX */
334 struct vfsconf
*vfsp
;
337 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
338 if (!strcmp(vfsp
->vfc_name
, fstypename
))
342 mp
= _MALLOC_ZONE((u_long
)sizeof(struct mount
), M_MOUNT
, M_WAITOK
);
343 bzero((char *)mp
, (u_long
)sizeof(struct mount
));
345 /* Initialize the default IO constraints */
346 mp
->mnt_maxreadcnt
= mp
->mnt_maxwritecnt
= MAXPHYS
;
347 mp
->mnt_segreadcnt
= mp
->mnt_segwritecnt
= 32;
349 lockinit(&mp
->mnt_lock
, PVFS
, "vfslock", 0, 0);
350 (void)vfs_busy(mp
, LK_NOWAIT
, 0, p
);
351 LIST_INIT(&mp
->mnt_vnodelist
);
353 mp
->mnt_op
= vfsp
->vfc_vfsops
;
354 mp
->mnt_flag
= MNT_RDONLY
;
355 mp
->mnt_vnodecovered
= NULLVP
;
356 vfsp
->vfc_refcount
++;
357 mp
->mnt_stat
.f_type
= vfsp
->vfc_typenum
;
358 mp
->mnt_flag
|= vfsp
->vfc_flags
& MNT_VISFLAGMASK
;
359 strncpy(mp
->mnt_stat
.f_fstypename
, vfsp
->vfc_name
, MFSNAMELEN
);
360 mp
->mnt_stat
.f_mntonname
[0] = '/';
361 (void) copystr(devname
, mp
->mnt_stat
.f_mntfromname
, MNAMELEN
- 1, 0);
367 * Find an appropriate filesystem to use for the root. If a filesystem
368 * has not been preselected, walk through the list of known filesystems
369 * trying those that have mountroot routines, and try them until one
370 * works or we have tried them all.
375 struct vfsconf
*vfsp
;
376 extern int (*mountroot
)(void);
379 if (mountroot
!= NULL
) {
380 error
= (*mountroot
)();
384 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
) {
385 if (vfsp
->vfc_mountroot
== NULL
)
387 if ((error
= (*vfsp
->vfc_mountroot
)()) == 0)
390 printf("%s_mountroot failed: %d\n", vfsp
->vfc_name
, error
);
396 * Lookup a mount point by filesystem identifier.
402 register struct mount
*mp
;
404 simple_lock(&mountlist_slock
);
405 CIRCLEQ_FOREACH(mp
, &mountlist
, mnt_list
) {
406 if (mp
->mnt_stat
.f_fsid
.val
[0] == fsid
->val
[0] &&
407 mp
->mnt_stat
.f_fsid
.val
[1] == fsid
->val
[1]) {
408 simple_unlock(&mountlist_slock
);
412 simple_unlock(&mountlist_slock
);
413 return ((struct mount
*)0);
417 * Get a new unique fsid
423 static u_short xxxfs_mntid
;
428 simple_lock(&mntid_slock
);
429 mtype
= mp
->mnt_vfc
->vfc_typenum
;
430 mp
->mnt_stat
.f_fsid
.val
[0] = makedev(nblkdev
+ mtype
, 0);
431 mp
->mnt_stat
.f_fsid
.val
[1] = mtype
;
432 if (xxxfs_mntid
== 0)
434 tfsid
.val
[0] = makedev(nblkdev
+ mtype
, xxxfs_mntid
);
435 tfsid
.val
[1] = mtype
;
436 if (!CIRCLEQ_EMPTY(&mountlist
)) {
437 while (vfs_getvfs(&tfsid
)) {
442 mp
->mnt_stat
.f_fsid
.val
[0] = tfsid
.val
[0];
443 simple_unlock(&mntid_slock
);
447 * Set vnode attributes to VNOVAL
451 register struct vattr
*vap
;
455 vap
->va_size
= vap
->va_bytes
= VNOVAL
;
456 vap
->va_mode
= vap
->va_nlink
= vap
->va_uid
= vap
->va_gid
=
457 vap
->va_fsid
= vap
->va_fileid
=
458 vap
->va_blocksize
= vap
->va_rdev
=
459 vap
->va_atime
.tv_sec
= vap
->va_atime
.tv_nsec
=
460 vap
->va_mtime
.tv_sec
= vap
->va_mtime
.tv_nsec
=
461 vap
->va_ctime
.tv_sec
= vap
->va_ctime
.tv_nsec
=
462 vap
->va_flags
= vap
->va_gen
= VNOVAL
;
467 * Routines having to do with the management of the vnode table.
469 extern int (**dead_vnodeop_p
)(void *);
470 static void vclean
__P((struct vnode
*vp
, int flag
, struct proc
*p
));
471 extern void vgonel
__P((struct vnode
*vp
, struct proc
*p
));
472 long numvnodes
, freevnodes
;
474 long vnode_reclaim_tried
;
475 long vnode_objects_reclaimed
;
478 extern struct vattr va_null
;
481 * Return the next vnode from the free list.
484 getnewvnode(tag
, mp
, vops
, vpp
)
487 int (**vops
)(void *);
490 struct proc
*p
= current_proc(); /* XXX */
492 int cnt
, didretry
= 0;
493 static int reused
= 0; /* track the reuse rate */
497 simple_lock(&vnode_free_list_slock
);
499 * MALLOC a vnode if the number of vnodes has not reached the desired
500 * value and the number on the free list is still reasonable...
501 * reuse from the freelist even though we may evict a name cache entry
502 * to reduce the number of vnodes that accumulate.... vnodes tie up
503 * wired memory and are never garbage collected
505 if (numvnodes
< desiredvnodes
&& (freevnodes
< (2 * VNODE_FREE_MIN
))) {
507 simple_unlock(&vnode_free_list_slock
);
508 MALLOC_ZONE(vp
, struct vnode
*, sizeof *vp
, M_VNODE
, M_WAITOK
);
509 bzero((char *)vp
, sizeof *vp
);
510 VLISTNONE(vp
); /* avoid double queue removal */
511 simple_lock_init(&vp
->v_interlock
);
516 * Once the desired number of vnodes are allocated,
517 * we start reusing the vnodes.
519 if (freevnodes
< VNODE_FREE_MIN
) {
521 * if we are low on vnodes on the freelist attempt to get
522 * some back from the inactive list and VM object cache
524 simple_unlock(&vnode_free_list_slock
);
525 (void)vnreclaim(vnodetarget
);
526 simple_lock(&vnode_free_list_slock
);
528 if (numvnodes
>= desiredvnodes
&& reused
> VNODE_TOOMANY_REUSED
) {
530 if (freevnodes
< VNODE_FREE_ENOUGH
) {
531 simple_unlock(&vnode_free_list_slock
);
532 (void)vnreclaim(vnodetarget
);
533 simple_lock(&vnode_free_list_slock
);
537 for (cnt
= 0, vp
= vnode_free_list
.tqh_first
;
538 vp
!= NULLVP
; cnt
++, vp
= vp
->v_freelist
.tqe_next
) {
539 if (simple_lock_try(&vp
->v_interlock
)) {
540 /* got the interlock */
541 if (ISSET(vp
->v_flag
, VORECLAIM
)) {
542 /* skip over the vnodes that are being reclaimed */
543 simple_unlock(&vp
->v_interlock
);
551 * Unless this is a bad time of the month, at most
552 * the first NCPUS items on the free list are
553 * locked, so this is close enough to being empty.
556 simple_unlock(&vnode_free_list_slock
);
557 if (!(didretry
++) && (vnreclaim(vnodetarget
) > 0))
560 log(LOG_EMERG
, "%d vnodes locked, %d desired, %d numvnodes, "
561 "%d free, %d inactive, %d being reclaimed\n",
562 cnt
, desiredvnodes
, numvnodes
, freevnodes
, inactivevnodes
,
569 panic("free vnode isn't: v_type = %d, v_usecount = %d?",
570 vp
->v_type
, vp
->v_usecount
);
572 VREMFREE("getnewvnode", vp
);
574 simple_unlock(&vnode_free_list_slock
);
577 if (vp
->v_type
!= VBAD
)
578 vgonel(vp
, p
); /* clean and reclaim the vnode */
580 simple_unlock(&vp
->v_interlock
);
583 panic("cleaned vnode isn't");
587 panic("Clean vnode has pending I/O's");
591 if (UBCINFOEXISTS(vp
))
592 panic("getnewvnode: ubcinfo not cleaned");
596 if (vp
->v_flag
& VHASDIRTY
)
599 // make sure all these fields are cleared out as the
600 // name/parent stuff uses them and assumes they're
601 // cleared to null/0.
602 if (vp
->v_scmap
!= NULL
) {
603 panic("getnewvnode: vp @ 0x%x has non-null scmap.\n", vp
);
605 vp
->v_un
.vu_name
= NULL
;
607 vp
->v_un1
.v_cl
.v_pad
= 0;
617 /* we may have blocked, re-evaluate state */
618 simple_lock(&vnode_free_list_slock
);
620 if (vp
->v_usecount
== 0)
621 VREMFREE("getnewvnode", vp
);
622 else if (ISSET((vp
)->v_flag
, VUINACTIVE
))
623 VREMINACTIVE("getnewvnode", vp
);
625 simple_unlock(&vnode_free_list_slock
);
628 vp
->v_flag
= VSTANDARD
;
640 * Move a vnode from one mount queue to another.
648 simple_lock(&mntvnode_slock
);
650 * Delete from old mount point vnode list, if on one.
652 if (vp
->v_mount
!= NULL
)
653 LIST_REMOVE(vp
, v_mntvnodes
);
655 * Insert into list of vnodes for the new mount point, if available.
657 if ((vp
->v_mount
= mp
) != NULL
)
658 LIST_INSERT_HEAD(&mp
->mnt_vnodelist
, vp
, v_mntvnodes
);
659 simple_unlock(&mntvnode_slock
);
663 vpwakeup(struct vnode
*vp
)
666 if (--vp
->v_numoutput
< 0)
667 panic("vpwakeup: neg numoutput");
668 if ((vp
->v_flag
& VBWAIT
|| vp
->v_flag
& VTHROTTLED
)
669 && vp
->v_numoutput
<= 0) {
670 vp
->v_flag
&= ~(VBWAIT
|VTHROTTLED
);
671 wakeup((caddr_t
)&vp
->v_numoutput
);
677 * Update outstanding I/O count and do wakeup if requested.
681 register struct buf
*bp
;
683 CLR(bp
->b_flags
, B_WRITEINPROG
);
688 * Flush out and invalidate all buffers associated with a vnode.
689 * Called with the underlying object locked.
692 vinvalbuf(vp
, flags
, cred
, p
, slpflag
, slptimeo
)
693 register struct vnode
*vp
;
697 int slpflag
, slptimeo
;
699 register struct buf
*bp
;
700 struct buf
*nbp
, *blist
;
703 if (flags
& V_SAVE
) {
704 if (error
= VOP_FSYNC(vp
, cred
, MNT_WAIT
, p
)) {
707 if (vp
->v_dirtyblkhd
.lh_first
)
708 panic("vinvalbuf: dirty bufs (vp 0x%x, bp 0x%x)", vp
, vp
->v_dirtyblkhd
.lh_first
);
712 if ((blist
= vp
->v_cleanblkhd
.lh_first
) && (flags
& V_SAVEMETA
))
713 while (blist
&& blist
->b_lblkno
< 0)
714 blist
= blist
->b_vnbufs
.le_next
;
715 if (!blist
&& (blist
= vp
->v_dirtyblkhd
.lh_first
) &&
716 (flags
& V_SAVEMETA
))
717 while (blist
&& blist
->b_lblkno
< 0)
718 blist
= blist
->b_vnbufs
.le_next
;
722 for (bp
= blist
; bp
; bp
= nbp
) {
723 nbp
= bp
->b_vnbufs
.le_next
;
724 if ((flags
& V_SAVEMETA
) && bp
->b_lblkno
< 0)
727 if (ISSET(bp
->b_flags
, B_BUSY
)) {
728 SET(bp
->b_flags
, B_WANTED
);
729 error
= tsleep((caddr_t
)bp
,
730 slpflag
| (PRIBIO
+ 1), "vinvalbuf",
739 SET(bp
->b_flags
, B_BUSY
);
742 * XXX Since there are no node locks for NFS, I believe
743 * there is a slight chance that a delayed write will
744 * occur while sleeping just above, so check for it.
746 if (ISSET(bp
->b_flags
, B_DELWRI
) && (flags
& V_SAVE
)) {
747 (void) VOP_BWRITE(bp
);
751 if (bp
->b_flags
& B_LOCKED
) {
752 panic("vinvalbuf: bp @ 0x%x is locked!", bp
);
755 SET(bp
->b_flags
, B_INVAL
);
760 if (!(flags
& V_SAVEMETA
) &&
761 (vp
->v_dirtyblkhd
.lh_first
|| vp
->v_cleanblkhd
.lh_first
))
762 panic("vinvalbuf: flush failed");
767 * Create a vnode for a block device.
768 * Used for root filesystem, argdev, and swap areas.
769 * Also used for memory file system special devices.
776 register struct vnode
*vp
;
784 error
= getnewvnode(VT_NON
, (struct mount
*)0, spec_vnodeop_p
, &nvp
);
791 if (nvp
= checkalias(vp
, dev
, (struct mount
*)0)) {
800 * Check to see if the new vnode represents a special device
801 * for which we already have a vnode (either because of
802 * bdevvp() or because of a different vnode representing
803 * the same block device). If such an alias exists, deallocate
804 * the existing contents and return the aliased vnode. The
805 * caller is responsible for filling it with its new contents.
808 checkalias(nvp
, nvp_rdev
, mp
)
809 register struct vnode
*nvp
;
813 struct proc
*p
= current_proc(); /* XXX */
816 struct specinfo
*specinfop
;
818 if (nvp
->v_type
!= VBLK
&& nvp
->v_type
!= VCHR
)
821 MALLOC_ZONE(specinfop
, struct specinfo
*, sizeof(struct specinfo
),
822 M_SPECINFO
, M_WAITOK
);
823 vpp
= &speclisth
[SPECHASH(nvp_rdev
)];
825 simple_lock(&spechash_slock
);
826 for (vp
= *vpp
; vp
; vp
= vp
->v_specnext
) {
827 if (nvp_rdev
!= vp
->v_rdev
|| nvp
->v_type
!= vp
->v_type
)
830 * Alias, but not in use, so flush it out.
832 simple_lock(&vp
->v_interlock
);
833 if (vp
->v_usecount
== 0) {
834 simple_unlock(&spechash_slock
);
838 if (vget(vp
, LK_EXCLUSIVE
| LK_INTERLOCK
, p
)) {
839 simple_unlock(&spechash_slock
);
844 if (vp
== NULL
|| vp
->v_tag
!= VT_NON
) {
845 nvp
->v_specinfo
= specinfop
;
846 specinfop
= 0; /* buffer used */
847 bzero(nvp
->v_specinfo
, sizeof(struct specinfo
));
848 nvp
->v_rdev
= nvp_rdev
;
849 nvp
->v_hashchain
= vpp
;
850 nvp
->v_specnext
= *vpp
;
851 nvp
->v_specflags
= 0;
852 simple_unlock(&spechash_slock
);
855 nvp
->v_flag
|= VALIASED
;
856 vp
->v_flag
|= VALIASED
;
859 /* Since buffer is used just return */
862 simple_unlock(&spechash_slock
);
863 VOP_UNLOCK(vp
, 0, p
);
864 simple_lock(&vp
->v_interlock
);
866 vp
->v_op
= nvp
->v_op
;
867 vp
->v_tag
= nvp
->v_tag
;
871 FREE_ZONE((void *)specinfop
, sizeof(struct specinfo
), M_SPECINFO
);
876 * Get a reference on a particular vnode and lock it if requested.
877 * If the vnode was on the inactive list, remove it from the list.
878 * If the vnode was on the free list, remove it from the list and
879 * move it to inactive list as needed.
880 * The vnode lock bit is set if the vnode is being eliminated in
881 * vgone. The process is awakened when the transition is completed,
882 * and an error returned to indicate that the vnode is no longer
883 * usable (possibly having been changed to a new file system type).
894 vpid
= vp
->v_id
; // save off the original v_id
899 * If the vnode is in the process of being cleaned out for
900 * another use, we wait for the cleaning to finish and then
901 * return failure. Cleaning is determined by checking that
902 * the VXLOCK flag is set.
904 if ((flags
& LK_INTERLOCK
) == 0)
905 simple_lock(&vp
->v_interlock
);
906 if ((vp
->v_flag
& VXLOCK
) || (vp
->v_flag
& VORECLAIM
)) {
907 vp
->v_flag
|= VXWANT
;
908 simple_unlock(&vp
->v_interlock
);
909 (void)tsleep((caddr_t
)vp
, PINOD
, "vget", 0);
914 * vnode is being terminated.
915 * wait for vnode_pager_no_senders() to clear VTERMINATE
917 if (ISSET(vp
->v_flag
, VTERMINATE
)) {
918 SET(vp
->v_flag
, VTERMWANT
);
919 simple_unlock(&vp
->v_interlock
);
920 (void)tsleep((caddr_t
)&vp
->v_ubcinfo
, PINOD
, "vget1", 0);
925 * if the vnode is being initialized,
926 * wait for it to finish initialization
928 if (ISSET(vp
->v_flag
, VUINIT
)) {
929 SET(vp
->v_flag
, VUWANT
);
930 simple_unlock(&vp
->v_interlock
);
931 (void) tsleep((caddr_t
)vp
, PINOD
, "vget2", 0);
935 simple_lock(&vnode_free_list_slock
);
937 if (vp
->v_usecount
== 0)
938 VREMFREE("vget", vp
);
939 else if (ISSET((vp
)->v_flag
, VUINACTIVE
))
940 VREMINACTIVE("vget", vp
);
942 simple_unlock(&vnode_free_list_slock
);
944 if (++vp
->v_usecount
<= 0)
945 panic("vget: v_usecount");
948 * Recover named reference as needed
950 if (UBCISVALID(vp
) && !ubc_issetflags(vp
, UI_HASOBJREF
)) {
951 simple_unlock(&vp
->v_interlock
);
952 if (ubc_getobject(vp
, UBC_HOLDOBJECT
) == MEMORY_OBJECT_CONTROL_NULL
) {
956 simple_lock(&vp
->v_interlock
);
959 if (flags
& LK_TYPE_MASK
) {
960 if (error
= vn_lock(vp
, flags
| LK_INTERLOCK
, p
))
962 if (vpid
!= vp
->v_id
) { // make sure it's still the same vnode
969 if ((flags
& LK_INTERLOCK
) == 0)
970 simple_unlock(&vp
->v_interlock
);
972 if (vpid
!= vp
->v_id
) { // make sure it's still the same vnode
980 simple_lock(&vp
->v_interlock
);
983 * we may have blocked. Re-evaluate the state
985 simple_lock(&vnode_free_list_slock
);
987 if (vp
->v_usecount
== 0)
988 VREMFREE("vget", vp
);
989 else if (ISSET((vp
)->v_flag
, VUINACTIVE
))
990 VREMINACTIVE("vget", vp
);
992 simple_unlock(&vnode_free_list_slock
);
995 * If the vnode was not active in the first place
996 * must not call vrele() as VOP_INACTIVE() is not
998 * So inlined part of vrele() here.
1000 if (--vp
->v_usecount
== 1) {
1001 if (UBCINFOEXISTS(vp
)) {
1003 simple_unlock(&vp
->v_interlock
);
1007 if (vp
->v_usecount
> 0) {
1008 simple_unlock(&vp
->v_interlock
);
1011 if (vp
->v_usecount
< 0)
1012 panic("vget: negative usecount (%d)", vp
->v_usecount
);
1014 simple_unlock(&vp
->v_interlock
);
1019 * Get a pager reference on the particular vnode.
1021 * This is called from ubc_info_init() and it is asumed that
1022 * the vnode is not on the free list.
1023 * It is also assumed that the vnode is neither being recycled
1024 * by vgonel nor being terminated by vnode_pager_vrele().
1026 * The vnode interlock is NOT held by the caller.
1028 __private_extern__
int
1029 vnode_pager_vget(vp
)
1032 simple_lock(&vp
->v_interlock
);
1034 UBCINFOCHECK("vnode_pager_vget", vp
);
1036 if (ISSET(vp
->v_flag
, (VXLOCK
|VORECLAIM
|VTERMINATE
)))
1037 panic("%s: dying vnode", "vnode_pager_vget");
1039 simple_lock(&vnode_free_list_slock
);
1040 /* The vnode should not be on free list */
1042 if (vp
->v_usecount
== 0)
1043 panic("%s: still on list", "vnode_pager_vget");
1044 else if (ISSET((vp
)->v_flag
, VUINACTIVE
))
1045 VREMINACTIVE("vnode_pager_vget", vp
);
1048 /* The vnode should not be on the inactive list here */
1049 simple_unlock(&vnode_free_list_slock
);
1051 /* After all those checks, now do the real work :-) */
1052 if (++vp
->v_usecount
<= 0)
1053 panic("vnode_pager_vget: v_usecount");
1054 simple_unlock(&vp
->v_interlock
);
1060 * Stubs to use when there is no locking to be done on the underlying object.
1061 * A minimal shared lock is necessary to ensure that the underlying object
1062 * is not revoked while an operation is in progress. So, an active shared
1063 * count is maintained in an auxillary vnode lock structure.
1067 struct vop_lock_args
/* {
1075 * This code cannot be used until all the non-locking filesystems
1076 * (notably NFS) are converted to properly lock and release nodes.
1077 * Also, certain vnode operations change the locking state within
1078 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
1079 * and symlink). Ideally these operations should not change the
1080 * lock state, but should be changed to let the caller of the
1081 * function unlock them. Otherwise all intermediate vnode layers
1082 * (such as union, umapfs, etc) must catch these functions to do
1083 * the necessary locking at their layer. Note that the inactive
1084 * and lookup operations also change their lock state, but this
1085 * cannot be avoided, so these two operations will always need
1086 * to be handled in intermediate layers.
1088 struct vnode
*vp
= ap
->a_vp
;
1089 int vnflags
, flags
= ap
->a_flags
;
1091 if (vp
->v_vnlock
== NULL
) {
1092 if ((flags
& LK_TYPE_MASK
) == LK_DRAIN
)
1094 MALLOC(vp
->v_vnlock
, struct lock__bsd__
*,
1095 sizeof(struct lock__bsd__
), M_TEMP
, M_WAITOK
);
1096 lockinit(vp
->v_vnlock
, PVFS
, "vnlock", 0, 0);
1098 switch (flags
& LK_TYPE_MASK
) {
1104 vnflags
= LK_SHARED
;
1107 case LK_EXCLUPGRADE
:
1112 panic("vop_nolock: bad operation %d", flags
& LK_TYPE_MASK
);
1114 if (flags
& LK_INTERLOCK
)
1115 vnflags
|= LK_INTERLOCK
;
1116 return(lockmgr(vp
->v_vnlock
, vnflags
, &vp
->v_interlock
, ap
->a_p
));
1119 * Since we are not using the lock manager, we must clear
1120 * the interlock here.
1122 if (ap
->a_flags
& LK_INTERLOCK
)
1123 simple_unlock(&ap
->a_vp
->v_interlock
);
1129 * Decrement the active use count.
1133 struct vop_unlock_args
/* {
1139 struct vnode
*vp
= ap
->a_vp
;
1141 if (vp
->v_vnlock
== NULL
)
1143 return (lockmgr(vp
->v_vnlock
, LK_RELEASE
, NULL
, ap
->a_p
));
1147 * Return whether or not the node is in use.
1151 struct vop_islocked_args
/* {
1155 struct vnode
*vp
= ap
->a_vp
;
1157 if (vp
->v_vnlock
== NULL
)
1159 return (lockstatus(vp
->v_vnlock
));
1170 simple_lock(&vp
->v_interlock
);
1171 if (vp
->v_usecount
<= 0)
1172 panic("vref used where vget required");
1174 /* If on the inactive list, remove it from there */
1175 simple_lock(&vnode_free_list_slock
);
1176 if (ISSET((vp
)->v_flag
, VUINACTIVE
))
1177 VREMINACTIVE("vref", vp
);
1178 simple_unlock(&vnode_free_list_slock
);
1180 if (++vp
->v_usecount
<= 0)
1181 panic("vref v_usecount");
1182 simple_unlock(&vp
->v_interlock
);
1186 clean_up_name_parent_ptrs(struct vnode
*vp
)
1188 if (VNAME(vp
) || VPARENT(vp
)) {
1192 // do it this way so we don't block before clearing
1211 * put the vnode on appropriate free list.
1212 * called with v_interlock held.
1219 extern int disable_funnel
;
1221 if ((curflock
= thread_funnel_get()) != kernel_flock
&&
1222 !(disable_funnel
&& curflock
!= THR_FUNNEL_NULL
))
1223 panic("Entering vfree() without kernel funnel");
1226 * if the vnode is not obtained by calling getnewvnode() we
1227 * are not responsible for the cleanup. Just return.
1229 if (!(vp
->v_flag
& VSTANDARD
)) {
1233 if (vp
->v_usecount
!= 0)
1234 panic("vfree: v_usecount");
1236 /* insert at tail of LRU list or at head if VAGE is set */
1237 simple_lock(&vnode_free_list_slock
);
1239 // make sure the name & parent pointers get cleared out
1240 // clean_up_name_parent_ptrs(vp);
1243 panic("%s: vnode still on list", "vfree");
1245 if (vp
->v_flag
& VAGE
) {
1246 TAILQ_INSERT_HEAD(&vnode_free_list
, vp
, v_freelist
);
1247 vp
->v_flag
&= ~VAGE
;
1249 TAILQ_INSERT_TAIL(&vnode_free_list
, vp
, v_freelist
);
1251 simple_unlock(&vnode_free_list_slock
);
1256 * put the vnode on the inactive list.
1257 * called with v_interlock held
1264 extern int disable_funnel
;
1266 if ((curflock
= thread_funnel_get()) != kernel_flock
&&
1267 !(disable_funnel
&& curflock
!= THR_FUNNEL_NULL
))
1268 panic("Entering vinactive() without kernel funnel");
1270 if (!UBCINFOEXISTS(vp
))
1271 panic("vinactive: not a UBC vnode");
1273 if (vp
->v_usecount
!= 1)
1274 panic("vinactive: v_usecount");
1276 simple_lock(&vnode_free_list_slock
);
1279 panic("%s: vnode still on list", "vinactive");
1280 VINACTIVECHECK("vinactive", vp
, 0);
1282 TAILQ_INSERT_TAIL(&vnode_inactive_list
, vp
, v_freelist
);
1283 SET(vp
->v_flag
, VUINACTIVE
);
1284 CLR(vp
->v_flag
, (VNOCACHE_DATA
| VRAOFF
));
1287 simple_unlock(&vnode_free_list_slock
);
1293 * vput(), just unlock and vrele()
1299 struct proc
*p
= current_proc(); /* XXX */
1301 simple_lock(&vp
->v_interlock
);
1302 if (--vp
->v_usecount
== 1) {
1303 if (UBCINFOEXISTS(vp
)) {
1305 simple_unlock(&vp
->v_interlock
);
1306 VOP_UNLOCK(vp
, 0, p
);
1310 if (vp
->v_usecount
> 0) {
1311 simple_unlock(&vp
->v_interlock
);
1312 VOP_UNLOCK(vp
, 0, p
);
1316 if (vp
->v_usecount
< 0 || vp
->v_writecount
!= 0) {
1317 vprint("vput: bad ref count", vp
);
1318 panic("vput: v_usecount = %d, v_writecount = %d",
1319 vp
->v_usecount
, vp
->v_writecount
);
1322 simple_lock(&vnode_free_list_slock
);
1323 if (ISSET((vp
)->v_flag
, VUINACTIVE
))
1324 VREMINACTIVE("vref", vp
);
1325 simple_unlock(&vnode_free_list_slock
);
1327 simple_unlock(&vp
->v_interlock
);
1328 VOP_INACTIVE(vp
, p
);
1330 * The interlock is not held and
1331 * VOP_INCATIVE releases the vnode lock.
1332 * We could block and the vnode might get reactivated
1333 * Can not just call vfree without checking the state
1335 simple_lock(&vp
->v_interlock
);
1337 if (vp
->v_usecount
== 0)
1339 else if ((vp
->v_usecount
== 1) && UBCINFOEXISTS(vp
))
1342 simple_unlock(&vp
->v_interlock
);
1347 * If count drops to zero, call inactive routine and return to freelist.
1353 struct proc
*p
= current_proc(); /* XXX */
1355 extern int disable_funnel
;
1357 if ((curflock
= thread_funnel_get()) != kernel_flock
&&
1358 !(disable_funnel
&& curflock
!= THR_FUNNEL_NULL
))
1359 panic("Entering vrele() without kernel funnel");
1361 simple_lock(&vp
->v_interlock
);
1362 if (--vp
->v_usecount
== 1) {
1363 if (UBCINFOEXISTS(vp
)) {
1364 if ((vp
->v_flag
& VXLOCK
) == 0)
1366 simple_unlock(&vp
->v_interlock
);
1370 if (vp
->v_usecount
> 0) {
1371 simple_unlock(&vp
->v_interlock
);
1375 if (vp
->v_usecount
< 0 || vp
->v_writecount
!= 0) {
1376 vprint("vrele: bad ref count", vp
);
1377 panic("vrele: ref cnt");
1381 if ((vp
->v_flag
& VXLOCK
) || (vp
->v_flag
& VORECLAIM
)) {
1382 /* vnode is being cleaned, just return */
1384 simple_unlock(&vp
->v_interlock
);
1388 if (vn_lock(vp
, LK_EXCLUSIVE
| LK_INTERLOCK
, p
) == 0) {
1389 VOP_INACTIVE(vp
, p
);
1391 * vn_lock releases the interlock and
1392 * VOP_INCATIVE releases the vnode lock.
1393 * We could block and the vnode might get reactivated
1394 * Can not just call vfree without checking the state
1396 simple_lock(&vp
->v_interlock
);
1398 if (vp
->v_usecount
== 0)
1400 else if ((vp
->v_usecount
== 1) && UBCINFOEXISTS(vp
))
1403 simple_unlock(&vp
->v_interlock
);
1408 simple_unlock(&vp
->v_interlock
);
1409 kprintf("vrele: vn_lock() failed for vp = 0x%08x\n", vp
);
1418 simple_lock(&vp
->v_interlock
);
1420 simple_unlock(&vp
->v_interlock
);
1425 * Page or buffer structure gets a reference.
1429 register struct vnode
*vp
;
1432 simple_lock(&vp
->v_interlock
);
1434 simple_unlock(&vp
->v_interlock
);
1438 * Page or buffer structure frees a reference.
1442 register struct vnode
*vp
;
1445 simple_lock(&vp
->v_interlock
);
1446 if (vp
->v_holdcnt
<= 0)
1447 panic("holdrele: holdcnt");
1449 simple_unlock(&vp
->v_interlock
);
1453 * Remove any vnodes in the vnode table belonging to mount point mp.
1455 * If MNT_NOFORCE is specified, there should not be any active ones,
1456 * return error if any are found (nb: this is a user error, not a
1457 * system error). If MNT_FORCE is specified, detach any active vnodes
1461 int busyprt
= 0; /* print out busy vnodes */
1463 struct ctldebug debug1
= { "busyprt", &busyprt
};
1468 vflush(mp
, skipvp
, flags
)
1470 struct vnode
*skipvp
;
1473 struct proc
*p
= current_proc();
1474 struct vnode
*vp
, *nvp
;
1477 simple_lock(&mntvnode_slock
);
1479 for (vp
= mp
->mnt_vnodelist
.lh_first
; vp
; vp
= nvp
) {
1480 if (vp
->v_mount
!= mp
)
1482 nvp
= vp
->v_mntvnodes
.le_next
;
1484 * Skip over a selected vnode.
1489 simple_lock(&vp
->v_interlock
);
1491 * Skip over a vnodes marked VSYSTEM or VNOFLUSH.
1493 if ((flags
& SKIPSYSTEM
) && ((vp
->v_flag
& VSYSTEM
) || (vp
->v_flag
& VNOFLUSH
))) {
1494 simple_unlock(&vp
->v_interlock
);
1498 * Skip over a vnodes marked VSWAP.
1500 if ((flags
& SKIPSWAP
) && (vp
->v_flag
& VSWAP
)) {
1501 simple_unlock(&vp
->v_interlock
);
1505 * If WRITECLOSE is set, only flush out regular file
1506 * vnodes open for writing.
1508 if ((flags
& WRITECLOSE
) &&
1509 (vp
->v_writecount
== 0 || vp
->v_type
!= VREG
)) {
1510 simple_unlock(&vp
->v_interlock
);
1514 * With v_usecount == 0, all we need to do is clear
1515 * out the vnode data structures and we are done.
1517 if (vp
->v_usecount
== 0) {
1518 simple_unlock(&mntvnode_slock
);
1520 simple_lock(&mntvnode_slock
);
1524 * If FORCECLOSE is set, forcibly close the vnode.
1525 * For block or character devices, revert to an
1526 * anonymous device. For all other files, just kill them.
1528 if (flags
& FORCECLOSE
) {
1529 simple_unlock(&mntvnode_slock
);
1530 if (vp
->v_type
!= VBLK
&& vp
->v_type
!= VCHR
) {
1534 vp
->v_op
= spec_vnodeop_p
;
1535 insmntque(vp
, (struct mount
*)0);
1537 simple_lock(&mntvnode_slock
);
1542 vprint("vflush: busy vnode", vp
);
1544 simple_unlock(&vp
->v_interlock
);
1547 simple_unlock(&mntvnode_slock
);
1548 if (busy
&& ((flags
& FORCECLOSE
)==0))
1554 * Disassociate the underlying file system from a vnode.
1555 * The vnode interlock is held on entry.
1558 vclean(vp
, flags
, p
)
1567 * if the vnode is not obtained by calling getnewvnode() we
1568 * are not responsible for the cleanup. Just return.
1570 if (!(vp
->v_flag
& VSTANDARD
)) {
1571 simple_unlock(&vp
->v_interlock
);
1576 * Check to see if the vnode is in use.
1577 * If so we have to reference it before we clean it out
1578 * so that its count cannot fall to zero and generate a
1579 * race against ourselves to recycle it.
1581 if (active
= vp
->v_usecount
) {
1583 * active vnode can not be on the free list.
1584 * we are about to take an extra reference on this vnode
1585 * do the queue management as needed
1586 * Not doing so can cause "still on list" or
1587 * "vnreclaim: v_usecount" panic if VOP_LOCK() blocks.
1589 simple_lock(&vnode_free_list_slock
);
1590 if (ISSET((vp
)->v_flag
, VUINACTIVE
))
1591 VREMINACTIVE("vclean", vp
);
1592 simple_unlock(&vnode_free_list_slock
);
1594 if (++vp
->v_usecount
<= 0)
1595 panic("vclean: v_usecount");
1599 * Prevent the vnode from being recycled or
1600 * brought into use while we clean it out.
1602 if (vp
->v_flag
& VXLOCK
)
1603 panic("vclean: deadlock");
1604 vp
->v_flag
|= VXLOCK
;
1607 * Even if the count is zero, the VOP_INACTIVE routine may still
1608 * have the object locked while it cleans it out. The VOP_LOCK
1609 * ensures that the VOP_INACTIVE routine is done with its work.
1610 * For active vnodes, it ensures that no other activity can
1611 * occur while the underlying object is being cleaned out.
1613 VOP_LOCK(vp
, LK_DRAIN
| LK_INTERLOCK
, p
);
1616 * While blocked in VOP_LOCK() someone could have dropped
1617 * reference[s] and we could land on the inactive list.
1618 * if this vnode is on the inactive list
1619 * take it off the list.
1621 simple_lock(&vnode_free_list_slock
);
1622 if (ISSET((vp
)->v_flag
, VUINACTIVE
))
1623 VREMINACTIVE("vclean", vp
);
1624 simple_unlock(&vnode_free_list_slock
);
1626 /* Clean the pages in VM. */
1627 if (active
&& (flags
& DOCLOSE
))
1628 VOP_CLOSE(vp
, IO_NDELAY
, NOCRED
, p
);
1630 /* Clean the pages in VM. */
1631 didhold
= ubc_hold(vp
);
1632 if ((active
) && (didhold
))
1633 (void)ubc_clean(vp
, 0); /* do not invalidate */
1636 * Clean out any buffers associated with the vnode.
1638 if (flags
& DOCLOSE
) {
1639 if (vp
->v_tag
== VT_NFS
)
1640 nfs_vinvalbuf(vp
, V_SAVE
, NOCRED
, p
, 0);
1642 vinvalbuf(vp
, V_SAVE
, NOCRED
, p
, 0, 0);
1646 VOP_INACTIVE(vp
, p
);
1648 VOP_UNLOCK(vp
, 0, p
);
1650 /* Destroy ubc named reference */
1653 ubc_destroy_named(vp
);
1656 * Make sure vp isn't on the inactive list.
1658 simple_lock(&vnode_free_list_slock
);
1659 if (ISSET((vp
)->v_flag
, VUINACTIVE
)) {
1660 VREMINACTIVE("vclean", vp
);
1662 simple_unlock(&vnode_free_list_slock
);
1665 * Reclaim the vnode.
1667 if (VOP_RECLAIM(vp
, p
))
1668 panic("vclean: cannot reclaim");
1670 // make sure the name & parent ptrs get cleaned out!
1671 clean_up_name_parent_ptrs(vp
);
1675 struct lock__bsd__
*tmp
= vp
->v_vnlock
;
1676 if ((tmp
->lk_flags
& LK_DRAINED
) == 0)
1677 vprint("vclean: lock not drained", vp
);
1678 vp
->v_vnlock
= NULL
;
1682 /* It's dead, Jim! */
1683 vp
->v_op
= dead_vnodeop_p
;
1686 insmntque(vp
, (struct mount
*)0);
1689 * Done with purge, notify sleepers of the grim news.
1691 vp
->v_flag
&= ~VXLOCK
;
1692 if (vp
->v_flag
& VXWANT
) {
1693 vp
->v_flag
&= ~VXWANT
;
1694 wakeup((caddr_t
)vp
);
1702 * Eliminate all activity associated with the requested vnode
1703 * and with all vnodes aliased to the requested vnode.
1707 struct vop_revoke_args
/* {
1712 struct vnode
*vp
, *vq
;
1713 struct proc
*p
= current_proc();
1716 if ((ap
->a_flags
& REVOKEALL
) == 0)
1717 panic("vop_revoke");
1721 simple_lock(&vp
->v_interlock
);
1723 if (vp
->v_flag
& VALIASED
) {
1725 * If a vgone (or vclean) is already in progress,
1726 * wait until it is done and return.
1728 if (vp
->v_flag
& VXLOCK
) {
1729 while (vp
->v_flag
& VXLOCK
) {
1730 vp
->v_flag
|= VXWANT
;
1731 simple_unlock(&vp
->v_interlock
);
1732 (void)tsleep((caddr_t
)vp
, PINOD
, "vop_revokeall", 0);
1737 * Ensure that vp will not be vgone'd while we
1738 * are eliminating its aliases.
1740 vp
->v_flag
|= VXLOCK
;
1741 simple_unlock(&vp
->v_interlock
);
1742 while (vp
->v_flag
& VALIASED
) {
1743 simple_lock(&spechash_slock
);
1744 for (vq
= *vp
->v_hashchain
; vq
; vq
= vq
->v_specnext
) {
1745 if (vq
->v_rdev
!= vp
->v_rdev
||
1746 vq
->v_type
!= vp
->v_type
|| vp
== vq
)
1748 simple_unlock(&spechash_slock
);
1753 simple_unlock(&spechash_slock
);
1756 * Remove the lock so that vgone below will
1757 * really eliminate the vnode after which time
1758 * vgone will awaken any sleepers.
1760 simple_lock(&vp
->v_interlock
);
1761 vp
->v_flag
&= ~VXLOCK
;
1768 * Recycle an unused vnode to the front of the free list.
1769 * Release the passed interlock if the vnode will be recycled.
1772 vrecycle(vp
, inter_lkp
, p
)
1774 struct slock
*inter_lkp
;
1778 simple_lock(&vp
->v_interlock
);
1779 if (vp
->v_usecount
== 0) {
1781 simple_unlock(inter_lkp
);
1785 simple_unlock(&vp
->v_interlock
);
1790 * Eliminate all activity associated with a vnode
1791 * in preparation for reuse.
1797 struct proc
*p
= current_proc();
1799 simple_lock(&vp
->v_interlock
);
1804 * vgone, with the vp interlock held.
1815 * if the vnode is not obtained by calling getnewvnode() we
1816 * are not responsible for the cleanup. Just return.
1818 if (!(vp
->v_flag
& VSTANDARD
)) {
1819 simple_unlock(&vp
->v_interlock
);
1824 * If a vgone (or vclean) is already in progress,
1825 * wait until it is done and return.
1827 if (vp
->v_flag
& VXLOCK
) {
1828 while (vp
->v_flag
& VXLOCK
) {
1829 vp
->v_flag
|= VXWANT
;
1830 simple_unlock(&vp
->v_interlock
);
1831 (void)tsleep((caddr_t
)vp
, PINOD
, "vgone", 0);
1836 * Clean out the filesystem specific data.
1838 vclean(vp
, DOCLOSE
, p
);
1840 * Delete from old mount point vnode list, if on one.
1842 if (vp
->v_mount
!= NULL
)
1843 insmntque(vp
, (struct mount
*)0);
1845 * If special device, remove it from special device alias list
1848 if ((vp
->v_type
== VBLK
|| vp
->v_type
== VCHR
) && vp
->v_specinfo
!= 0) {
1849 simple_lock(&spechash_slock
);
1850 if (*vp
->v_hashchain
== vp
) {
1851 *vp
->v_hashchain
= vp
->v_specnext
;
1853 for (vq
= *vp
->v_hashchain
; vq
; vq
= vq
->v_specnext
) {
1854 if (vq
->v_specnext
!= vp
)
1856 vq
->v_specnext
= vp
->v_specnext
;
1860 panic("missing bdev");
1862 if (vp
->v_flag
& VALIASED
) {
1864 for (vq
= *vp
->v_hashchain
; vq
; vq
= vq
->v_specnext
) {
1865 if (vq
->v_rdev
!= vp
->v_rdev
||
1866 vq
->v_type
!= vp
->v_type
)
1873 panic("missing alias");
1875 vx
->v_flag
&= ~VALIASED
;
1876 vp
->v_flag
&= ~VALIASED
;
1878 simple_unlock(&spechash_slock
);
1880 struct specinfo
*tmp
= vp
->v_specinfo
;
1881 vp
->v_specinfo
= NULL
;
1882 FREE_ZONE((void *)tmp
, sizeof(struct specinfo
), M_SPECINFO
);
1886 * If it is on the freelist and not already at the head,
1887 * move it to the head of the list. The test of the back
1888 * pointer and the reference count of zero is because
1889 * it will be removed from the free list by getnewvnode,
1890 * but will not have its reference count incremented until
1891 * after calling vgone. If the reference count were
1892 * incremented first, vgone would (incorrectly) try to
1893 * close the previous instance of the underlying object.
1894 * So, the back pointer is explicitly set to `0xdeadb' in
1895 * getnewvnode after removing it from the freelist to ensure
1896 * that we do not try to move it here.
1898 if (vp
->v_usecount
== 0 && (vp
->v_flag
& VUINACTIVE
) == 0) {
1899 simple_lock(&vnode_free_list_slock
);
1900 if ((vp
->v_freelist
.tqe_prev
!= (struct vnode
**)0xdeadb) &&
1901 vnode_free_list
.tqh_first
!= vp
) {
1902 TAILQ_REMOVE(&vnode_free_list
, vp
, v_freelist
);
1903 TAILQ_INSERT_HEAD(&vnode_free_list
, vp
, v_freelist
);
1905 simple_unlock(&vnode_free_list_slock
);
1911 * Lookup a vnode by device number.
1914 vfinddev(dev
, type
, vpp
)
1922 simple_lock(&spechash_slock
);
1923 for (vp
= speclisth
[SPECHASH(dev
)]; vp
; vp
= vp
->v_specnext
) {
1924 if (dev
!= vp
->v_rdev
|| type
!= vp
->v_type
)
1930 simple_unlock(&spechash_slock
);
1935 * Calculate the total number of references to a special device.
1941 struct vnode
*vq
, *vnext
;
1945 if ((vp
->v_flag
& VALIASED
) == 0)
1946 return (vp
->v_usecount
);
1947 simple_lock(&spechash_slock
);
1948 for (count
= 0, vq
= *vp
->v_hashchain
; vq
; vq
= vnext
) {
1949 vnext
= vq
->v_specnext
;
1950 if (vq
->v_rdev
!= vp
->v_rdev
|| vq
->v_type
!= vp
->v_type
)
1953 * Alias, but not in use, so flush it out.
1955 if (vq
->v_usecount
== 0 && vq
!= vp
) {
1956 simple_unlock(&spechash_slock
);
1960 count
+= vq
->v_usecount
;
1962 simple_unlock(&spechash_slock
);
1966 int prtactive
= 0; /* 1 => print out reclaim of active vnodes */
1969 * Print out a description of a vnode.
1971 static char *typename
[] =
1972 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1977 register struct vnode
*vp
;
1982 printf("%s: ", label
);
1983 printf("type %s, usecount %d, writecount %d, refcount %d,",
1984 typename
[vp
->v_type
], vp
->v_usecount
, vp
->v_writecount
,
1987 if (vp
->v_flag
& VROOT
)
1988 strcat(buf
, "|VROOT");
1989 if (vp
->v_flag
& VTEXT
)
1990 strcat(buf
, "|VTEXT");
1991 if (vp
->v_flag
& VSYSTEM
)
1992 strcat(buf
, "|VSYSTEM");
1993 if (vp
->v_flag
& VNOFLUSH
)
1994 strcat(buf
, "|VNOFLUSH");
1995 if (vp
->v_flag
& VXLOCK
)
1996 strcat(buf
, "|VXLOCK");
1997 if (vp
->v_flag
& VXWANT
)
1998 strcat(buf
, "|VXWANT");
1999 if (vp
->v_flag
& VBWAIT
)
2000 strcat(buf
, "|VBWAIT");
2001 if (vp
->v_flag
& VALIASED
)
2002 strcat(buf
, "|VALIASED");
2004 printf(" flags (%s)", &buf
[1]);
2005 if (vp
->v_data
== NULL
) {
2015 * List all of the locked vnodes in the system.
2016 * Called when debugging the kernel.
2021 struct proc
*p
= current_proc();
2022 struct mount
*mp
, *nmp
;
2025 printf("Locked vnodes\n");
2026 simple_lock(&mountlist_slock
);
2027 for (mp
= mountlist
.cqh_first
; mp
!= (void *)&mountlist
; mp
= nmp
) {
2028 if (vfs_busy(mp
, LK_NOWAIT
, &mountlist_slock
, p
)) {
2029 nmp
= mp
->mnt_list
.cqe_next
;
2032 for (vp
= mp
->mnt_vnodelist
.lh_first
;
2034 vp
= vp
->v_mntvnodes
.le_next
) {
2035 if (VOP_ISLOCKED(vp
))
2036 vprint((char *)0, vp
);
2038 simple_lock(&mountlist_slock
);
2039 nmp
= mp
->mnt_list
.cqe_next
;
2042 simple_unlock(&mountlist_slock
);
2047 build_path(struct vnode
*vp
, char *buff
, int buflen
, int *outlen
)
2050 int i
, len
, ret
=0, counter
=0;
2052 end
= &buff
[buflen
-1];
2055 while(vp
&& VPARENT(vp
) != vp
) {
2056 // the maximum depth of a file system hierarchy is MAXPATHLEN/2
2057 // (with single-char names separated by slashes). we panic if
2058 // we've ever looped more than that.
2059 if (counter
++ > MAXPATHLEN
/2) {
2060 panic("build_path: vnode parent chain is too long! vp 0x%x\n", vp
);
2063 if (VNAME(vp
) == NULL
) {
2064 if (VPARENT(vp
) != NULL
) {
2070 // count how long the string is
2071 for(len
=0; *str
; str
++, len
++)
2074 // check that there's enough space
2075 if ((end
- buff
) < len
) {
2080 // copy it backwards
2081 for(; len
> 0; len
--) {
2085 // put in the path separator
2088 // walk up the chain.
2091 // check if we're crossing a mount point and
2092 // switch the vp if we are.
2093 if (vp
&& (vp
->v_flag
& VROOT
)) {
2094 vp
= vp
->v_mount
->mnt_vnodecovered
;
2098 // slide it down to the beginning of the buffer
2099 memmove(buff
, end
, &buff
[buflen
] - end
);
2101 *outlen
= &buff
[buflen
] - end
;
2106 __private_extern__
int
2107 vn_getpath(struct vnode
*vp
, char *pathbuf
, int *len
)
2109 return build_path(vp
, pathbuf
, *len
, len
);
2115 * Top level filesystem related information gathering.
2118 vfs_sysctl(name
, namelen
, oldp
, oldlenp
, newp
, newlen
, p
)
2127 struct vfsconf
*vfsp
;
2133 * The VFS_NUMMNTOPS shouldn't be at name[0] since
2134 * is a VFS generic variable. So now we must check
2135 * namelen so we don't end up covering any UFS
2136 * variables (sinc UFS vfc_typenum is 1).
2138 * It should have been:
2139 * name[0]: VFS_GENERIC
2140 * name[1]: VFS_NUMMNTOPS
2142 if (namelen
== 1 && name
[0] == VFS_NUMMNTOPS
) {
2143 extern unsigned int vfs_nummntops
;
2144 return (sysctl_rdint(oldp
, oldlenp
, newp
, vfs_nummntops
));
2147 /* all sysctl names at this level are at least name and field */
2149 return (EISDIR
); /* overloaded */
2150 if (name
[0] != VFS_GENERIC
) {
2151 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
2152 if (vfsp
->vfc_typenum
== name
[0])
2155 return (EOPNOTSUPP
);
2156 return ((*vfsp
->vfc_vfsops
->vfs_sysctl
)(&name
[1], namelen
- 1,
2157 oldp
, oldlenp
, newp
, newlen
, p
));
2160 case VFS_MAXTYPENUM
:
2161 return (sysctl_rdint(oldp
, oldlenp
, newp
, maxvfsconf
));
2164 return (ENOTDIR
); /* overloaded */
2165 for (vfsp
= vfsconf
; vfsp
; vfsp
= vfsp
->vfc_next
)
2166 if (vfsp
->vfc_typenum
== name
[2])
2169 return (EOPNOTSUPP
);
2170 return (sysctl_rdstruct(oldp
, oldlenp
, newp
, vfsp
,
2171 sizeof(struct vfsconf
)));
2174 * We need to get back into the general MIB, so we need to re-prepend
2175 * CTL_VFS to our name and try userland_sysctl().
2177 usernamelen
= namelen
+ 1;
2178 MALLOC(username
, int *, usernamelen
* sizeof(*username
),
2180 bcopy(name
, username
+ 1, namelen
* sizeof(*name
));
2181 username
[0] = CTL_VFS
;
2182 error
= userland_sysctl(p
, username
, usernamelen
, oldp
, oldlenp
, 1,
2183 newp
, newlen
, oldlenp
);
2184 FREE(username
, M_TEMP
);
2188 int kinfo_vdebug
= 1;
2189 #define KINFO_VNODESLOP 10
2191 * Dump vnode list (via sysctl).
2192 * Copyout address of vnode followed by vnode.
2196 sysctl_vnode(where
, sizep
, p
)
2201 struct mount
*mp
, *nmp
;
2202 struct vnode
*nvp
, *vp
;
2203 char *bp
= where
, *savebp
;
2207 #define VPTRSZ sizeof (struct vnode *)
2208 #define VNODESZ sizeof (struct vnode)
2209 if (where
== NULL
) {
2210 *sizep
= (numvnodes
+ KINFO_VNODESLOP
) * (VPTRSZ
+ VNODESZ
);
2213 ewhere
= where
+ *sizep
;
2215 simple_lock(&mountlist_slock
);
2216 for (mp
= mountlist
.cqh_first
; mp
!= (void *)&mountlist
; mp
= nmp
) {
2217 if (vfs_busy(mp
, LK_NOWAIT
, &mountlist_slock
, p
)) {
2218 nmp
= mp
->mnt_list
.cqe_next
;
2223 simple_lock(&mntvnode_slock
);
2224 for (vp
= mp
->mnt_vnodelist
.lh_first
;
2228 * Check that the vp is still associated with
2229 * this filesystem. RACE: could have been
2230 * recycled onto the same filesystem.
2232 if (vp
->v_mount
!= mp
) {
2233 simple_unlock(&mntvnode_slock
);
2235 printf("kinfo: vp changed\n");
2239 nvp
= vp
->v_mntvnodes
.le_next
;
2240 if (bp
+ VPTRSZ
+ VNODESZ
> ewhere
) {
2241 simple_unlock(&mntvnode_slock
);
2243 *sizep
= bp
- where
;
2246 simple_unlock(&mntvnode_slock
);
2247 if ((error
= copyout((caddr_t
)&vp
, bp
, VPTRSZ
)) ||
2248 (error
= copyout((caddr_t
)vp
, bp
+ VPTRSZ
, VNODESZ
))) {
2252 bp
+= VPTRSZ
+ VNODESZ
;
2253 simple_lock(&mntvnode_slock
);
2255 simple_unlock(&mntvnode_slock
);
2256 simple_lock(&mountlist_slock
);
2257 nmp
= mp
->mnt_list
.cqe_next
;
2260 simple_unlock(&mountlist_slock
);
2262 *sizep
= bp
- where
;
2267 * Check to see if a filesystem is mounted on a block device.
2276 if (vp
->v_specflags
& SI_MOUNTEDON
)
2278 if (vp
->v_flag
& VALIASED
) {
2279 simple_lock(&spechash_slock
);
2280 for (vq
= *vp
->v_hashchain
; vq
; vq
= vq
->v_specnext
) {
2281 if (vq
->v_rdev
!= vp
->v_rdev
||
2282 vq
->v_type
!= vp
->v_type
)
2284 if (vq
->v_specflags
& SI_MOUNTEDON
) {
2289 simple_unlock(&spechash_slock
);
2295 * Unmount all filesystems. The list is traversed in reverse order
2296 * of mounting to avoid dependencies.
2298 __private_extern__
void
2301 struct mount
*mp
, *nmp
;
2302 struct proc
*p
= current_proc();
2305 * Since this only runs when rebooting, it is not interlocked.
2307 for (mp
= mountlist
.cqh_last
; mp
!= (void *)&mountlist
; mp
= nmp
) {
2308 nmp
= mp
->mnt_list
.cqe_prev
;
2309 (void) dounmount(mp
, MNT_FORCE
, p
);
2314 * Build hash lists of net addresses and hang them off the mount point.
2315 * Called by vfs_export() to set up the lists of export addresses.
2318 vfs_hang_addrlist(mp
, nep
, argp
)
2320 struct netexport
*nep
;
2321 struct export_args
*argp
;
2323 register struct netcred
*np
;
2324 register struct radix_node_head
*rnh
;
2326 struct radix_node
*rn
;
2327 struct sockaddr
*saddr
, *smask
= 0;
2331 if (argp
->ex_addrlen
== 0) {
2332 if (mp
->mnt_flag
& MNT_DEFEXPORTED
)
2334 np
= &nep
->ne_defexported
;
2335 np
->netc_exflags
= argp
->ex_flags
;
2336 np
->netc_anon
= argp
->ex_anon
;
2337 np
->netc_anon
.cr_ref
= 1;
2338 mp
->mnt_flag
|= MNT_DEFEXPORTED
;
2341 i
= sizeof(struct netcred
) + argp
->ex_addrlen
+ argp
->ex_masklen
;
2342 MALLOC(np
, struct netcred
*, i
, M_NETADDR
, M_WAITOK
);
2343 bzero((caddr_t
)np
, i
);
2344 saddr
= (struct sockaddr
*)(np
+ 1);
2345 if (error
= copyin(argp
->ex_addr
, (caddr_t
)saddr
, argp
->ex_addrlen
))
2347 if (saddr
->sa_len
> argp
->ex_addrlen
)
2348 saddr
->sa_len
= argp
->ex_addrlen
;
2349 if (argp
->ex_masklen
) {
2350 smask
= (struct sockaddr
*)((caddr_t
)saddr
+ argp
->ex_addrlen
);
2351 error
= copyin(argp
->ex_addr
, (caddr_t
)smask
, argp
->ex_masklen
);
2354 if (smask
->sa_len
> argp
->ex_masklen
)
2355 smask
->sa_len
= argp
->ex_masklen
;
2357 i
= saddr
->sa_family
;
2358 if ((rnh
= nep
->ne_rtable
[i
]) == 0) {
2360 * Seems silly to initialize every AF when most are not
2361 * used, do so on demand here
2363 for (dom
= domains
; dom
; dom
= dom
->dom_next
)
2364 if (dom
->dom_family
== i
&& dom
->dom_rtattach
) {
2365 dom
->dom_rtattach((void **)&nep
->ne_rtable
[i
],
2369 if ((rnh
= nep
->ne_rtable
[i
]) == 0) {
2374 rn
= (*rnh
->rnh_addaddr
)((caddr_t
)saddr
, (caddr_t
)smask
, rnh
,
2378 * One of the reasons that rnh_addaddr may fail is that
2379 * the entry already exists. To check for this case, we
2380 * look up the entry to see if it is there. If so, we
2381 * do not need to make a new entry but do return success.
2383 _FREE(np
, M_NETADDR
);
2384 rn
= (*rnh
->rnh_matchaddr
)((caddr_t
)saddr
, rnh
);
2385 if (rn
!= 0 && (rn
->rn_flags
& RNF_ROOT
) == 0 &&
2386 ((struct netcred
*)rn
)->netc_exflags
== argp
->ex_flags
&&
2387 !bcmp((caddr_t
)&((struct netcred
*)rn
)->netc_anon
,
2388 (caddr_t
)&argp
->ex_anon
, sizeof(struct ucred
)))
2392 np
->netc_exflags
= argp
->ex_flags
;
2393 np
->netc_anon
= argp
->ex_anon
;
2394 np
->netc_anon
.cr_ref
= 1;
2397 _FREE(np
, M_NETADDR
);
2403 vfs_free_netcred(rn
, w
)
2404 struct radix_node
*rn
;
2407 register struct radix_node_head
*rnh
= (struct radix_node_head
*)w
;
2409 (*rnh
->rnh_deladdr
)(rn
->rn_key
, rn
->rn_mask
, rnh
);
2410 _FREE((caddr_t
)rn
, M_NETADDR
);
2415 * Free the net address hash lists that are hanging off the mount points.
2418 vfs_free_addrlist(nep
)
2419 struct netexport
*nep
;
2422 register struct radix_node_head
*rnh
;
2424 for (i
= 0; i
<= AF_MAX
; i
++)
2425 if (rnh
= nep
->ne_rtable
[i
]) {
2426 (*rnh
->rnh_walktree
)(rnh
, vfs_free_netcred
,
2428 _FREE((caddr_t
)rnh
, M_RTABLE
);
2429 nep
->ne_rtable
[i
] = 0;
2434 vfs_export(mp
, nep
, argp
)
2436 struct netexport
*nep
;
2437 struct export_args
*argp
;
2441 if (argp
->ex_flags
& MNT_DELEXPORT
) {
2442 vfs_free_addrlist(nep
);
2443 mp
->mnt_flag
&= ~(MNT_EXPORTED
| MNT_DEFEXPORTED
);
2445 if (argp
->ex_flags
& MNT_EXPORTED
) {
2446 if (error
= vfs_hang_addrlist(mp
, nep
, argp
))
2448 mp
->mnt_flag
|= MNT_EXPORTED
;
2454 vfs_export_lookup(mp
, nep
, nam
)
2455 register struct mount
*mp
;
2456 struct netexport
*nep
;
2459 register struct netcred
*np
;
2460 register struct radix_node_head
*rnh
;
2461 struct sockaddr
*saddr
;
2464 if (mp
->mnt_flag
& MNT_EXPORTED
) {
2466 * Lookup in the export list first.
2469 saddr
= mtod(nam
, struct sockaddr
*);
2470 rnh
= nep
->ne_rtable
[saddr
->sa_family
];
2472 np
= (struct netcred
*)
2473 (*rnh
->rnh_matchaddr
)((caddr_t
)saddr
,
2475 if (np
&& np
->netc_rnodes
->rn_flags
& RNF_ROOT
)
2480 * If no address match, use the default if it exists.
2482 if (np
== NULL
&& mp
->mnt_flag
& MNT_DEFEXPORTED
)
2483 np
= &nep
->ne_defexported
;
2489 * try to reclaim vnodes from the memory
2493 vm_object_cache_reclaim(int count
)
2496 void vnode_pager_release_from_cache(int *);
2498 /* attempt to reclaim vnodes from VM object cache */
2500 vnode_pager_release_from_cache(&cnt
);
2505 * Release memory object reference held by inactive vnodes
2506 * and then try to reclaim some vnodes from the memory
2510 vnreclaim(int count
)
2520 /* Try to release "count" vnodes from the inactive list */
2522 if (++loopcnt
> inactivevnodes
) {
2524 * I did my best trying to reclaim the vnodes.
2525 * Do not try any more as that would only lead to
2526 * long latencies. Also in the worst case
2527 * this can get totally CPU bound.
2528 * Just fall though and attempt a reclaim of VM
2534 simple_lock(&vnode_free_list_slock
);
2535 for (vp
= TAILQ_FIRST(&vnode_inactive_list
);
2536 (vp
!= NULLVP
) && (i
< count
);
2537 vp
= TAILQ_NEXT(vp
, v_freelist
)) {
2539 if (!simple_lock_try(&vp
->v_interlock
))
2542 if (vp
->v_usecount
!= 1)
2543 panic("vnreclaim: v_usecount");
2545 if(!UBCINFOEXISTS(vp
)) {
2546 if (vp
->v_type
== VBAD
) {
2547 VREMINACTIVE("vnreclaim", vp
);
2548 simple_unlock(&vp
->v_interlock
);
2551 panic("non UBC vnode on inactive list");
2552 /* Should not reach here */
2555 /* If vnode is already being reclaimed, wait */
2556 if ((vp
->v_flag
& VXLOCK
) || (vp
->v_flag
& VORECLAIM
)) {
2557 vp
->v_flag
|= VXWANT
;
2558 simple_unlock(&vp
->v_interlock
);
2559 simple_unlock(&vnode_free_list_slock
);
2560 (void)tsleep((caddr_t
)vp
, PINOD
, "vocr", 0);
2565 * if the vnode is being initialized,
2568 if (ISSET(vp
->v_flag
, VUINIT
)) {
2569 SET(vp
->v_flag
, VUWANT
);
2570 simple_unlock(&vp
->v_interlock
);
2574 VREMINACTIVE("vnreclaim", vp
);
2575 simple_unlock(&vnode_free_list_slock
);
2577 if (ubc_issetflags(vp
, UI_WASMAPPED
)) {
2579 * We should not reclaim as it is likely
2580 * to be in use. Let it die a natural death.
2581 * Release the UBC reference if one exists
2582 * and put it back at the tail.
2584 simple_unlock(&vp
->v_interlock
);
2585 if (ubc_release_named(vp
)) {
2586 if (UBCINFOEXISTS(vp
)) {
2587 simple_lock(&vp
->v_interlock
);
2588 if (vp
->v_usecount
== 1 && !VONLIST(vp
))
2590 simple_unlock(&vp
->v_interlock
);
2593 simple_lock(&vp
->v_interlock
);
2595 simple_unlock(&vp
->v_interlock
);
2600 VORECLAIM_ENABLE(vp
);
2603 * scrub the dirty pages and invalidate the buffers
2606 err
= vn_lock(vp
, LK_EXCLUSIVE
|LK_INTERLOCK
, p
);
2608 /* cannot reclaim */
2609 simple_lock(&vp
->v_interlock
);
2611 VORECLAIM_DISABLE(vp
);
2613 simple_unlock(&vp
->v_interlock
);
2617 /* keep the vnode alive so we can kill it */
2618 simple_lock(&vp
->v_interlock
);
2619 if(vp
->v_usecount
!= 1)
2620 panic("VOCR: usecount race");
2622 simple_unlock(&vp
->v_interlock
);
2624 /* clean up the state in VM without invalidating */
2625 didhold
= ubc_hold(vp
);
2627 (void)ubc_clean(vp
, 0);
2629 /* flush and invalidate buffers associated with the vnode */
2630 if (vp
->v_tag
== VT_NFS
)
2631 nfs_vinvalbuf(vp
, V_SAVE
, NOCRED
, p
, 0);
2633 vinvalbuf(vp
, V_SAVE
, NOCRED
, p
, 0, 0);
2636 * Note: for the v_usecount == 2 case, VOP_INACTIVE
2637 * has not yet been called. Call it now while vp is
2638 * still locked, it will also release the lock.
2640 if (vp
->v_usecount
== 2)
2641 VOP_INACTIVE(vp
, p
);
2643 VOP_UNLOCK(vp
, 0, p
);
2649 * destroy the ubc named reference.
2650 * If we can't because it is held for I/Os
2651 * in progress, just put it back on the inactive
2652 * list and move on. Otherwise, the paging reference
2653 * is toast (and so is this vnode?).
2655 if (ubc_destroy_named(vp
)) {
2658 simple_lock(&vp
->v_interlock
);
2659 VORECLAIM_DISABLE(vp
);
2660 simple_unlock(&vp
->v_interlock
);
2661 vrele(vp
); /* release extra use we added here */
2663 /* inactive list lock was released, must restart */
2666 simple_unlock(&vnode_free_list_slock
);
2668 vnode_reclaim_tried
+= i
;
2670 i
= vm_object_cache_reclaim(count
);
2671 vnode_objects_reclaimed
+= i
;
2677 * This routine is called from vnode_pager_no_senders()
2678 * which in turn can be called with vnode locked by vnode_uncache()
2679 * But it could also get called as a result of vm_object_cache_trim().
2680 * In that case lock state is unknown.
2681 * AGE the vnode so that it gets recycled quickly.
2682 * Check lock status to decide whether to call vput() or vrele().
2684 __private_extern__
void
2685 vnode_pager_vrele(struct vnode
*vp
)
2688 boolean_t funnel_state
;
2689 int isvnreclaim
= 1;
2691 funnel_state
= thread_funnel_set(kernel_flock
, TRUE
);
2693 /* Mark the vnode to be recycled */
2696 simple_lock(&vp
->v_interlock
);
2698 * If a vgone (or vclean) is already in progress,
2699 * Do not bother with the ubc_info cleanup.
2700 * Let the vclean deal with it.
2702 if (vp
->v_flag
& VXLOCK
) {
2703 CLR(vp
->v_flag
, VTERMINATE
);
2704 if (ISSET(vp
->v_flag
, VTERMWANT
)) {
2705 CLR(vp
->v_flag
, VTERMWANT
);
2706 wakeup((caddr_t
)&vp
->v_ubcinfo
);
2708 simple_unlock(&vp
->v_interlock
);
2710 (void) thread_funnel_set(kernel_flock
, funnel_state
);
2714 /* It's dead, Jim! */
2715 if (!ISSET(vp
->v_flag
, VORECLAIM
)) {
2717 * called as a result of eviction of the memory
2718 * object from the memory object cache
2722 /* So serialize vnode operations */
2723 VORECLAIM_ENABLE(vp
);
2725 if (!ISSET(vp
->v_flag
, VTERMINATE
))
2726 SET(vp
->v_flag
, VTERMINATE
);
2730 if (UBCINFOEXISTS(vp
)) {
2731 struct ubc_info
*uip
= vp
->v_ubcinfo
;
2733 if (ubc_issetflags(vp
, UI_WASMAPPED
))
2734 SET(vp
->v_flag
, VWASMAPPED
);
2736 vp
->v_ubcinfo
= UBC_NOINFO
; /* catch bad accesses */
2737 simple_unlock(&vp
->v_interlock
);
2738 ubc_info_deallocate(uip
);
2740 if ((vp
->v_type
== VBAD
) && ((vp
)->v_ubcinfo
!= UBC_INFO_NULL
)
2741 && ((vp
)->v_ubcinfo
!= UBC_NOINFO
)) {
2742 struct ubc_info
*uip
= vp
->v_ubcinfo
;
2744 vp
->v_ubcinfo
= UBC_NOINFO
; /* catch bad accesses */
2745 simple_unlock(&vp
->v_interlock
);
2746 ubc_info_deallocate(uip
);
2748 simple_unlock(&vp
->v_interlock
);
2752 CLR(vp
->v_flag
, VTERMINATE
);
2754 if (vp
->v_type
!= VBAD
){
2755 vgone(vp
); /* revoke the vnode */
2756 vrele(vp
); /* and drop the reference */
2760 if (ISSET(vp
->v_flag
, VTERMWANT
)) {
2761 CLR(vp
->v_flag
, VTERMWANT
);
2762 wakeup((caddr_t
)&vp
->v_ubcinfo
);
2765 VORECLAIM_DISABLE(vp
);
2766 (void) thread_funnel_set(kernel_flock
, funnel_state
);
2772 int walk_vnodes_debug
=0;
2777 struct mount
*mp
, *nmp
;
2781 for (mp
= mountlist
.cqh_first
; mp
!= (void *)&mountlist
; mp
= nmp
) {
2782 for (vp
= mp
->mnt_vnodelist
.lh_first
;
2784 vp
= vp
->v_mntvnodes
.le_next
) {
2785 if (vp
->v_usecount
< 0){
2786 if(walk_vnodes_debug
) {
2787 printf("vp is %x\n",vp
);
2791 nmp
= mp
->mnt_list
.cqe_next
;
2793 for (cnt
= 0, vp
= vnode_free_list
.tqh_first
;
2794 vp
!= NULLVP
; cnt
++, vp
= vp
->v_freelist
.tqe_next
) {
2795 if ((vp
->v_usecount
< 0) && walk_vnodes_debug
) {
2796 if(walk_vnodes_debug
) {
2797 printf("vp is %x\n",vp
);
2801 printf("%d - free\n", cnt
);
2803 for (cnt
= 0, vp
= vnode_inactive_list
.tqh_first
;
2804 vp
!= NULLVP
; cnt
++, vp
= vp
->v_freelist
.tqe_next
) {
2805 if ((vp
->v_usecount
< 0) && walk_vnodes_debug
) {
2806 if(walk_vnodes_debug
) {
2807 printf("vp is %x\n",vp
);
2811 printf("%d - inactive\n", cnt
);
2813 #endif /* DIAGNOSTIC */
2816 struct x_constraints
{
2817 u_int32_t x_maxreadcnt
;
2818 u_int32_t x_maxsegreadsize
;
2819 u_int32_t x_maxsegwritesize
;
2824 vfs_io_attributes(vp
, flags
, iosize
, vectors
)
2826 int flags
; /* B_READ or B_WRITE */
2832 /* start with "reasonable" defaults */
2840 if (mp
->mnt_kern_flag
& MNTK_IO_XINFO
)
2841 *iosize
= ((struct x_constraints
*)(mp
->mnt_xinfo_ptr
))->x_maxreadcnt
;
2843 *iosize
= mp
->mnt_maxreadcnt
;
2844 *vectors
= mp
->mnt_segreadcnt
;
2847 *iosize
= mp
->mnt_maxwritecnt
;
2848 *vectors
= mp
->mnt_segwritecnt
;
2863 vfs_io_maxsegsize(vp
, flags
, maxsegsize
)
2865 int flags
; /* B_READ or B_WRITE */
2870 /* start with "reasonable" default */
2871 *maxsegsize
= MAXPHYS
;
2877 if (mp
->mnt_kern_flag
& MNTK_IO_XINFO
)
2878 *maxsegsize
= ((struct x_constraints
*)(mp
->mnt_xinfo_ptr
))->x_maxsegreadsize
;
2881 * if the extended info doesn't exist
2882 * then use the maxread I/O size as the
2883 * max segment size... this is the previous behavior
2885 *maxsegsize
= mp
->mnt_maxreadcnt
;
2888 if (mp
->mnt_kern_flag
& MNTK_IO_XINFO
)
2889 *maxsegsize
= ((struct x_constraints
*)(mp
->mnt_xinfo_ptr
))->x_maxsegwritesize
;
2892 * if the extended info doesn't exist
2893 * then use the maxwrite I/O size as the
2894 * max segment size... this is the previous behavior
2896 *maxsegsize
= mp
->mnt_maxwritecnt
;
2901 if (*maxsegsize
== 0)
2902 *maxsegsize
= MAXPHYS
;
2907 #include <sys/disk.h>
2911 vfs_init_io_attributes(devvp
, mp
)
2912 struct vnode
*devvp
;
2917 off_t writeblockcnt
;
2928 struct proc
*p
= current_proc();
2929 struct ucred
*cred
= p
->p_ucred
;
2933 * determine if this mount point exists on the same device as the root
2934 * partition... if so, then it comes under the hard throttle control
2937 static int rootunit
= -1;
2938 extern struct vnode
*rootvp
;
2940 if (rootunit
== -1) {
2941 if (VOP_IOCTL(rootvp
, DKIOCGETBSDUNIT
, (caddr_t
)&rootunit
, 0, cred
, p
))
2943 else if (rootvp
== devvp
)
2944 mp
->mnt_kern_flag
|= MNTK_ROOTDEV
;
2946 if (devvp
!= rootvp
&& rootunit
!= -1) {
2947 if (VOP_IOCTL(devvp
, DKIOCGETBSDUNIT
, (caddr_t
)&thisunit
, 0, cred
, p
) == 0) {
2948 if (thisunit
== rootunit
)
2949 mp
->mnt_kern_flag
|= MNTK_ROOTDEV
;
2952 if (VOP_IOCTL(devvp
, DKIOCGETISVIRTUAL
, (caddr_t
)&isvirtual
, 0, cred
, p
) == 0) {
2954 mp
->mnt_kern_flag
|= MNTK_VIRTUALDEV
;
2957 if ((error
= VOP_IOCTL(devvp
, DKIOCGETMAXBLOCKCOUNTREAD
,
2958 (caddr_t
)&readblockcnt
, 0, cred
, p
)))
2961 if ((error
= VOP_IOCTL(devvp
, DKIOCGETMAXBLOCKCOUNTWRITE
,
2962 (caddr_t
)&writeblockcnt
, 0, cred
, p
)))
2965 if ((error
= VOP_IOCTL(devvp
, DKIOCGETMAXBYTECOUNTREAD
,
2966 (caddr_t
)&readmaxcnt
, 0, cred
, p
)))
2969 if ((error
= VOP_IOCTL(devvp
, DKIOCGETMAXBYTECOUNTWRITE
,
2970 (caddr_t
)&writemaxcnt
, 0, cred
, p
)))
2973 if ((error
= VOP_IOCTL(devvp
, DKIOCGETMAXSEGMENTCOUNTREAD
,
2974 (caddr_t
)&readsegcnt
, 0, cred
, p
)))
2977 if ((error
= VOP_IOCTL(devvp
, DKIOCGETMAXSEGMENTCOUNTWRITE
,
2978 (caddr_t
)&writesegcnt
, 0, cred
, p
)))
2981 if ((error
= VOP_IOCTL(devvp
, DKIOCGETMAXSEGMENTBYTECOUNTREAD
,
2982 (caddr_t
)&readsegsize
, 0, cred
, p
)))
2985 if ((error
= VOP_IOCTL(devvp
, DKIOCGETMAXSEGMENTBYTECOUNTWRITE
,
2986 (caddr_t
)&writesegsize
, 0, cred
, p
)))
2989 if ((error
= VOP_IOCTL(devvp
, DKIOCGETBLOCKSIZE
,
2990 (caddr_t
)&blksize
, 0, cred
, p
)))
2994 if ( !(mp
->mnt_kern_flag
& MNTK_IO_XINFO
)) {
2995 MALLOC(mp
->mnt_xinfo_ptr
, void *, sizeof(struct x_constraints
), M_TEMP
, M_WAITOK
);
2996 mp
->mnt_kern_flag
|= MNTK_IO_XINFO
;
3000 temp
= (readmaxcnt
> UINT32_MAX
) ? UINT32_MAX
: readmaxcnt
;
3003 temp
= readblockcnt
* blksize
;
3004 temp
= (temp
> UINT32_MAX
) ? UINT32_MAX
: temp
;
3008 ((struct x_constraints
*)(mp
->mnt_xinfo_ptr
))->x_maxreadcnt
= (u_int32_t
)temp
;
3011 temp
= (writemaxcnt
> UINT32_MAX
) ? UINT32_MAX
: writemaxcnt
;
3013 if (writeblockcnt
) {
3014 temp
= writeblockcnt
* blksize
;
3015 temp
= (temp
> UINT32_MAX
) ? UINT32_MAX
: temp
;
3019 mp
->mnt_maxwritecnt
= (u_int32_t
)temp
;
3022 temp
= (readsegcnt
> UINT16_MAX
) ? UINT16_MAX
: readsegcnt
;
3023 mp
->mnt_segreadcnt
= (u_int16_t
)temp
;
3026 temp
= (writesegcnt
> UINT16_MAX
) ? UINT16_MAX
: writesegcnt
;
3027 mp
->mnt_segwritecnt
= (u_int16_t
)temp
;
3030 temp
= (readsegsize
> UINT32_MAX
) ? UINT32_MAX
: readsegsize
;
3032 temp
= mp
->mnt_maxreadcnt
;
3033 ((struct x_constraints
*)(mp
->mnt_xinfo_ptr
))->x_maxsegreadsize
= (u_int32_t
)temp
;
3036 temp
= (writesegsize
> UINT32_MAX
) ? UINT32_MAX
: writesegsize
;
3038 temp
= mp
->mnt_maxwritecnt
;
3039 ((struct x_constraints
*)(mp
->mnt_xinfo_ptr
))->x_maxsegwritesize
= (u_int32_t
)temp
;
3044 static struct klist fs_klist
;
3047 vfs_event_init(void)
3050 klist_init(&fs_klist
);
3054 vfs_event_signal(fsid_t
*fsid
, u_int32_t event
, intptr_t data
)
3057 KNOTE(&fs_klist
, event
);
3061 * return the number of mounted filesystems.
3064 sysctl_vfs_getvfscnt(void)
3069 simple_lock(&mountlist_slock
);
3070 CIRCLEQ_FOREACH(mp
, &mountlist
, mnt_list
)
3072 simple_unlock(&mountlist_slock
);
3077 * fill in the array of fsid_t's up to a max of 'count', the actual
3078 * number filled in will be set in '*actual'. If there are more fsid_t's
3079 * than room in fsidlst then ENOMEM will be returned and '*actual' will
3080 * have the actual count.
3081 * having *actual filled out even in the error case is depended upon.
3084 sysctl_vfs_getvfslist(fsid_t
*fsidlst
, int count
, int *actual
)
3089 simple_lock(&mountlist_slock
);
3090 CIRCLEQ_FOREACH(mp
, &mountlist
, mnt_list
) {
3092 if (*actual
<= count
)
3093 fsidlst
[(*actual
) - 1] = mp
->mnt_stat
.f_fsid
;
3095 simple_unlock(&mountlist_slock
);
3096 return (*actual
<= count
? 0 : ENOMEM
);
3100 sysctl_vfs_vfslist SYSCTL_HANDLER_ARGS
3106 /* This is a readonly node. */
3107 if (req
->newptr
!= NULL
)
3110 /* they are querying us so just return the space required. */
3111 if (req
->oldptr
== NULL
) {
3112 req
->oldidx
= sysctl_vfs_getvfscnt() * sizeof(fsid_t
);
3117 * Retrieve an accurate count of the amount of space required to copy
3118 * out all the fsids in the system.
3120 space
= req
->oldlen
;
3121 req
->oldlen
= sysctl_vfs_getvfscnt() * sizeof(fsid_t
);
3123 /* they didn't give us enough space. */
3124 if (space
< req
->oldlen
)
3127 MALLOC(fsidlst
, fsid_t
*, req
->oldlen
, M_TEMP
, M_WAITOK
);
3128 error
= sysctl_vfs_getvfslist(fsidlst
, req
->oldlen
/ sizeof(fsid_t
),
3131 * If we get back ENOMEM, then another mount has been added while we
3132 * slept in malloc above. If this is the case then try again.
3134 if (error
== ENOMEM
) {
3135 FREE(fsidlst
, M_TEMP
);
3136 req
->oldlen
= space
;
3140 error
= SYSCTL_OUT(req
, fsidlst
, actual
* sizeof(fsid_t
));
3142 FREE(fsidlst
, M_TEMP
);
3147 * Do a sysctl by fsid.
3150 sysctl_vfs_ctlbyfsid SYSCTL_HANDLER_ARGS
3157 int error
, flags
, namelen
;
3163 error
= SYSCTL_IN(req
, &vc
, sizeof(vc
));
3166 if (vc
.vc_vers
!= VFS_CTL_VERS1
)
3168 mp
= vfs_getvfs(&vc
.vc_fsid
);
3171 /* reset so that the fs specific code can fetch it. */
3174 * Note if this is a VFS_CTL then we pass the actual sysctl req
3175 * in for "oldp" so that the lower layer can DTRT and use the
3176 * SYSCTL_IN/OUT routines.
3178 if (mp
->mnt_op
->vfs_sysctl
!= NULL
) {
3179 error
= mp
->mnt_op
->vfs_sysctl(name
, namelen
,
3180 req
, NULL
, NULL
, 0, req
->p
);
3181 if (error
!= EOPNOTSUPP
)
3185 case VFS_CTL_UMOUNT
:
3186 VCTLTOREQ(&vc
, req
);
3187 error
= SYSCTL_IN(req
, &flags
, sizeof(flags
));
3190 error
= safedounmount(mp
, flags
, p
);
3192 case VFS_CTL_STATFS
:
3193 VCTLTOREQ(&vc
, req
);
3194 error
= SYSCTL_IN(req
, &flags
, sizeof(flags
));
3198 if (((flags
& MNT_NOWAIT
) == 0 || (flags
& MNT_WAIT
)) &&
3199 (error
= VFS_STATFS(mp
, sp
, p
)))
3201 sp
->f_flags
= mp
->mnt_flag
& MNT_VISFLAGMASK
;
3202 error
= SYSCTL_OUT(req
, sp
, sizeof(*sp
));
3205 return (EOPNOTSUPP
);
3210 static int filt_fsattach(struct knote
*kn
);
3211 static void filt_fsdetach(struct knote
*kn
);
3212 static int filt_fsevent(struct knote
*kn
, long hint
);
3214 struct filterops fs_filtops
=
3215 { 0, filt_fsattach
, filt_fsdetach
, filt_fsevent
};
3218 filt_fsattach(struct knote
*kn
)
3221 kn
->kn_flags
|= EV_CLEAR
;
3222 KNOTE_ATTACH(&fs_klist
, kn
);
3227 filt_fsdetach(struct knote
*kn
)
3230 KNOTE_DETACH(&fs_klist
, kn
);
3234 filt_fsevent(struct knote
*kn
, long hint
)
3237 kn
->kn_fflags
|= hint
;
3238 return (kn
->kn_fflags
!= 0);
3242 sysctl_vfs_noremotehang SYSCTL_HANDLER_ARGS
3249 /* We need a pid. */
3250 if (req
->newptr
== NULL
)
3253 error
= SYSCTL_IN(req
, &pid
, sizeof(pid
));
3257 p
= pfind(pid
< 0 ? -pid
: pid
);
3262 * Fetching the value is ok, but we only fetch if the old
3265 if (req
->oldptr
!= NULL
) {
3266 out
= !((p
->p_flag
& P_NOREMOTEHANG
) == 0);
3267 error
= SYSCTL_OUT(req
, &out
, sizeof(out
));
3271 /* cansignal offers us enough security. */
3272 if (p
!= req
->p
&& suser(req
->p
->p_ucred
, &req
->p
->p_acflag
) != 0)
3276 p
->p_flag
&= ~P_NOREMOTEHANG
;
3278 p
->p_flag
|= P_NOREMOTEHANG
;
3282 /* the vfs.generic. branch. */
3283 SYSCTL_NODE(_vfs
, VFS_GENERIC
, generic
, CTLFLAG_RW
, 0, "vfs generic hinge");
3284 /* retreive a list of mounted filesystem fsid_t */
3285 SYSCTL_PROC(_vfs_generic
, OID_AUTO
, vfsidlist
, CTLFLAG_RD
,
3286 0, 0, sysctl_vfs_vfslist
, "S,fsid", "List of mounted filesystem ids");
3287 /* perform operations on filesystem via fsid_t */
3288 SYSCTL_NODE(_vfs_generic
, OID_AUTO
, ctlbyfsid
, CTLFLAG_RW
,
3289 sysctl_vfs_ctlbyfsid
, "ctlbyfsid");
3290 SYSCTL_PROC(_vfs_generic
, OID_AUTO
, noremotehang
, CTLFLAG_RW
,
3291 0, 0, sysctl_vfs_noremotehang
, "I", "noremotehang");