/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* External virtual filesystem routines
*/
+#undef DIAGNOSTIC
#define DIAGNOSTIC 1
#include <sys/param.h>
#include <sys/ubc.h>
#include <sys/vm.h>
#include <sys/sysctl.h>
+#include <sys/filedesc.h>
+#include <sys/event.h>
+
+#include <string.h>
+#include <machine/spl.h>
+
#include <kern/assert.h>
#include <miscfs/specfs/specdev.h>
+#include <mach/mach_types.h>
+#include <mach/memory_object_types.h>
+
+
enum vtype iftovt_tab[16] = {
VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
static void vfree(struct vnode *vp);
static void vinactive(struct vnode *vp);
-extern int vnreclaim(int count);
+static int vnreclaim(int count);
extern kern_return_t
adjust_vm_object_cache(vm_size_t oval, vm_size_t nval);
-/*
- * Insq/Remq for the vnode usage lists.
- */
-#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
-#define bufremvn(bp) { \
- LIST_REMOVE(bp, b_vnbufs); \
- (bp)->b_vnbufs.le_next = NOLIST; \
-}
-
TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
TAILQ_HEAD(inactivelst, vnode) vnode_inactive_list; /* vnode inactive list */
struct mntlist mountlist; /* mounted filesystem list */
#define VORECLAIM_ENABLE(vp) \
do { \
if (ISSET((vp)->v_flag, VORECLAIM)) \
- panic("vm object raclaim already"); \
+ panic("vm_object_reclaim already"); \
SET((vp)->v_flag, VORECLAIM); \
} while(0)
* cache. Having too few vnodes on the free list causes serious disk
* thrashing as we cycle through them.
*/
-#define VNODE_FREE_MIN 100 /* freelist should have at least these many */
+#define VNODE_FREE_MIN 300 /* freelist should have at least these many */
/*
* We need to get vnodes back from the VM object cache when a certain #
/*
* Initialize the vnode management data structures.
*/
-void
+__private_extern__ void
vntblinit()
{
extern struct lock__bsd__ exchangelock;
}
/* Reset the VM Object Cache with the values passed in */
-kern_return_t
+__private_extern__ kern_return_t
reset_vmobjectcache(unsigned int val1, unsigned int val2)
{
vm_size_t oval = val1 - VNODE_FREE_MIN;
- vm_size_t nval = val2 - VNODE_FREE_MIN;
+ vm_size_t nval;
+
+ if(val2 < VNODE_FREE_MIN)
+ nval = 0;
+ else
+ nval = val2 - VNODE_FREE_MIN;
return(adjust_vm_object_cache(oval, nval));
}
return (ENODEV);
mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
bzero((char *)mp, (u_long)sizeof(struct mount));
+
+ /* Initialize the default IO constraints */
+ mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
+ mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
+
lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
(void)vfs_busy(mp, LK_NOWAIT, 0, p);
LIST_INIT(&mp->mnt_vnodelist);
register struct mount *mp;
simple_lock(&mountlist_slock);
- for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
- mp = mp->mnt_list.cqe_next) {
+ CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
simple_unlock(&mountlist_slock);
++xxxfs_mntid;
tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
tfsid.val[1] = mtype;
- if (mountlist.cqh_first != (void *)&mountlist) {
+ if (!CIRCLEQ_EMPTY(&mountlist)) {
while (vfs_getvfs(&tfsid)) {
tfsid.val[0]++;
xxxfs_mntid++;
simple_unlock(&vp->v_interlock);
reclaimhits++;
} else
- break;
- }
+ break;
+ }
}
/*
else
vp->v_ubcinfo = 0;
+ if (vp->v_flag & VHASDIRTY)
+ cluster_release(vp);
+
+ // make sure all these fields are cleared out as the
+ // name/parent stuff uses them and assumes they're
+ // cleared to null/0.
+ if (vp->v_scmap != NULL) {
+ panic("getnewvnode: vp @ 0x%x has non-null scmap.\n", vp);
+ }
+ vp->v_un.vu_name = NULL;
+ vp->v_scdirty = 0;
+ vp->v_un1.v_cl.v_pad = 0;
+
+
vp->v_lastr = -1;
vp->v_ralen = 0;
vp->v_maxra = 0;
- vp->v_lastw = 0;
vp->v_ciosiz = 0;
- vp->v_cstart = 0;
vp->v_clen = 0;
vp->v_socket = 0;
+ /* we may have blocked, re-evaluate state */
+ simple_lock(&vnode_free_list_slock);
+ if (VONLIST(vp)) {
+ if (vp->v_usecount == 0)
+ VREMFREE("getnewvnode", vp);
+ else if (ISSET((vp)->v_flag, VUINACTIVE))
+ VREMINACTIVE("getnewvnode", vp);
+ }
+ simple_unlock(&vnode_free_list_slock);
+
done:
vp->v_flag = VSTANDARD;
vp->v_type = VNON;
simple_unlock(&mntvnode_slock);
}
+__inline void
+vpwakeup(struct vnode *vp)
+{
+ if (vp) {
+ if (--vp->v_numoutput < 0)
+ panic("vpwakeup: neg numoutput");
+ if ((vp->v_flag & VBWAIT || vp->v_flag & VTHROTTLED)
+ && vp->v_numoutput <= 0) {
+ vp->v_flag &= ~(VBWAIT|VTHROTTLED);
+ wakeup((caddr_t)&vp->v_numoutput);
+ }
+ }
+}
+
/*
* Update outstanding I/O count and do wakeup if requested.
*/
vwakeup(bp)
register struct buf *bp;
{
- register struct vnode *vp;
-
CLR(bp->b_flags, B_WRITEINPROG);
- if (vp = bp->b_vp) {
- if (--vp->v_numoutput < 0)
- panic("vwakeup: neg numoutput");
- if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
- if (vp->v_numoutput < 0)
- panic("vwakeup: neg numoutput 2");
- vp->v_flag &= ~VBWAIT;
- wakeup((caddr_t)&vp->v_numoutput);
- }
- }
+ vpwakeup(bp->b_vp);
}
/*
if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) {
return (error);
}
- if (vp->v_dirtyblkhd.lh_first != NULL || (vp->v_flag & VHASDIRTY))
- panic("vinvalbuf: dirty bufs");
+ if (vp->v_dirtyblkhd.lh_first)
+ panic("vinvalbuf: dirty bufs (vp 0x%x, bp 0x%x)", vp, vp->v_dirtyblkhd.lh_first);
}
for (;;) {
- if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
+ if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
while (blist && blist->b_lblkno < 0)
blist = blist->b_vnbufs.le_next;
if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
for (bp = blist; bp; bp = nbp) {
nbp = bp->b_vnbufs.le_next;
- if (flags & V_SAVEMETA && bp->b_lblkno < 0)
+ if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
continue;
s = splbio();
if (ISSET(bp->b_flags, B_BUSY)) {
(void) VOP_BWRITE(bp);
break;
}
- SET(bp->b_flags, B_INVAL);
+
+ if (bp->b_flags & B_LOCKED) {
+ panic("vinvalbuf: bp @ 0x%x is locked!", bp);
+ break;
+ } else {
+ SET(bp->b_flags, B_INVAL);
+ }
brelse(bp);
}
}
return (0);
}
-/*
- * Associate a buffer with a vnode.
- */
-void
-bgetvp(vp, bp)
- register struct vnode *vp;
- register struct buf *bp;
-{
-
- if (bp->b_vp)
- panic("bgetvp: not free");
- VHOLD(vp);
- bp->b_vp = vp;
- if (vp->v_type == VBLK || vp->v_type == VCHR)
- bp->b_dev = vp->v_rdev;
- else
- bp->b_dev = NODEV;
- /*
- * Insert onto list for new vnode.
- */
- bufinsvn(bp, &vp->v_cleanblkhd);
-}
-
-/*
- * Disassociate a buffer from a vnode.
- */
-void
-brelvp(bp)
- register struct buf *bp;
-{
- struct vnode *vp;
-
- if (bp->b_vp == (struct vnode *) 0)
- panic("brelvp: NULL");
- /*
- * Delete from old vnode list, if on one.
- */
- if (bp->b_vnbufs.le_next != NOLIST)
- bufremvn(bp);
- vp = bp->b_vp;
- bp->b_vp = (struct vnode *) 0;
- HOLDRELE(vp);
-}
-
-/*
- * Reassign a buffer from one vnode to another.
- * Used to assign file specific control information
- * (indirect blocks) to the vnode to which they belong.
- */
-void
-reassignbuf(bp, newvp)
- register struct buf *bp;
- register struct vnode *newvp;
-{
- register struct buflists *listheadp;
-
- if (newvp == NULL) {
- printf("reassignbuf: NULL");
- return;
- }
- /*
- * Delete from old vnode list, if on one.
- */
- if (bp->b_vnbufs.le_next != NOLIST)
- bufremvn(bp);
- /*
- * If dirty, put on list of dirty buffers;
- * otherwise insert onto list of clean buffers.
- */
- if (ISSET(bp->b_flags, B_DELWRI))
- listheadp = &newvp->v_dirtyblkhd;
- else
- listheadp = &newvp->v_cleanblkhd;
- bufinsvn(bp, listheadp);
-}
-
/*
* Create a vnode for a block device.
* Used for root filesystem, argdev, and swap areas.
struct proc *p = current_proc(); /* XXX */
struct vnode *vp;
struct vnode **vpp;
- struct specinfo * bufhold;
- int buffree = 1;
+ struct specinfo *specinfop;
if (nvp->v_type != VBLK && nvp->v_type != VCHR)
return (NULLVP);
- bufhold = (struct specinfo *)_MALLOC_ZONE(sizeof(struct specinfo),
- M_VNODE, M_WAITOK);
+ MALLOC_ZONE(specinfop, struct specinfo *, sizeof(struct specinfo),
+ M_SPECINFO, M_WAITOK);
vpp = &speclisth[SPECHASH(nvp_rdev)];
loop:
simple_lock(&spechash_slock);
break;
}
if (vp == NULL || vp->v_tag != VT_NON) {
- nvp->v_specinfo = bufhold;
- buffree = 0; /* buffer used */
+ nvp->v_specinfo = specinfop;
+ specinfop = 0; /* buffer used */
bzero(nvp->v_specinfo, sizeof(struct specinfo));
nvp->v_rdev = nvp_rdev;
nvp->v_hashchain = vpp;
vp->v_tag = nvp->v_tag;
nvp->v_type = VNON;
insmntque(vp, mp);
- if (buffree)
- _FREE_ZONE((void *)bufhold, sizeof (struct specinfo), M_VNODE);
+ if (specinfop)
+ FREE_ZONE((void *)specinfop, sizeof(struct specinfo), M_SPECINFO);
return (vp);
}
/*
- * Grab a particular vnode from the free list, increment its
- * reference count and lock it. The vnode lock bit is set the
- * vnode is being eliminated in vgone. The process is awakened
- * when the transition is completed, and an error returned to
- * indicate that the vnode is no longer usable (possibly having
- * been changed to a new file system type).
+ * Get a reference on a particular vnode and lock it if requested.
+ * If the vnode was on the inactive list, remove it from the list.
+ * If the vnode was on the free list, remove it from the list and
+ * move it to inactive list as needed.
+ * The vnode lock bit is set if the vnode is being eliminated in
+ * vgone. The process is awakened when the transition is completed,
+ * and an error returned to indicate that the vnode is no longer
+ * usable (possibly having been changed to a new file system type).
*/
int
vget(vp, flags, p)
struct proc *p;
{
int error = 0;
+ u_long vpid;
+
+ vpid = vp->v_id; // save off the original v_id
+
+retry:
/*
* If the vnode is in the process of being cleaned out for
if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
vp->v_flag |= VXWANT;
simple_unlock(&vp->v_interlock);
- tsleep((caddr_t)vp, PINOD, "vget", 0);
+ (void)tsleep((caddr_t)vp, PINOD, "vget", 0);
return (ENOENT);
}
if (ISSET(vp->v_flag, VTERMINATE)) {
SET(vp->v_flag, VTERMWANT);
simple_unlock(&vp->v_interlock);
- tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vclean", 0);
+ (void)tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vget1", 0);
return (ENOENT);
}
+ /*
+ * if the vnode is being initialized,
+ * wait for it to finish initialization
+ */
+ if (ISSET(vp->v_flag, VUINIT)) {
+ SET(vp->v_flag, VUWANT);
+ simple_unlock(&vp->v_interlock);
+ (void) tsleep((caddr_t)vp, PINOD, "vget2", 0);
+ goto retry;
+ }
+
simple_lock(&vnode_free_list_slock);
- /* If on the free list, remove it from there */
- if (vp->v_usecount == 0) {
- if (VONLIST(vp))
+ if (VONLIST(vp)) {
+ if (vp->v_usecount == 0)
VREMFREE("vget", vp);
- } else {
- /* If on the inactive list, remove it from there */
- if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
- if (VONLIST(vp))
- VREMINACTIVE("vget", vp);
- }
+ else if (ISSET((vp)->v_flag, VUINACTIVE))
+ VREMINACTIVE("vget", vp);
}
-
- /* The vnode should not be on the inactive list here */
- VINACTIVECHECK("vget", vp, 0);
-
simple_unlock(&vnode_free_list_slock);
+
if (++vp->v_usecount <= 0)
panic("vget: v_usecount");
+ /*
+ * Recover named reference as needed
+ */
+ if (UBCISVALID(vp) && !ubc_issetflags(vp, UI_HASOBJREF)) {
+ simple_unlock(&vp->v_interlock);
+ if (ubc_getobject(vp, UBC_HOLDOBJECT) == MEMORY_OBJECT_CONTROL_NULL) {
+ error = ENOENT;
+ goto errout;
+ }
+ simple_lock(&vp->v_interlock);
+ }
+
if (flags & LK_TYPE_MASK) {
- if (error = vn_lock(vp, flags | LK_INTERLOCK, p)) {
- /*
- * If the vnode was not active in the first place
- * must not call vrele() as VOP_INACTIVE() is not
- * required.
- * So inlined part of vrele() here.
- */
- simple_lock(&vp->v_interlock);
- if (--vp->v_usecount == 1) {
- if (UBCINFOEXISTS(vp)) {
- vinactive(vp);
- simple_unlock(&vp->v_interlock);
- return (error);
- }
- }
- if (vp->v_usecount > 0) {
- simple_unlock(&vp->v_interlock);
- return (error);
- }
- if (vp->v_usecount < 0)
- panic("vget: negative usecount (%d)", vp->v_usecount);
- vfree(vp);
- simple_unlock(&vp->v_interlock);
+ if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
+ goto errout;
+ if (vpid != vp->v_id) { // make sure it's still the same vnode
+ vput(vp);
+ return ENOENT;
}
- return (error);
+ return (0);
+ }
+
+ if ((flags & LK_INTERLOCK) == 0)
+ simple_unlock(&vp->v_interlock);
+
+ if (vpid != vp->v_id) { // make sure it's still the same vnode
+ vrele(vp);
+ return ENOENT;
}
+ return (0);
+
+errout:
+ simple_lock(&vp->v_interlock);
+
/*
- * If this is a valid UBC vnode, if usecount is 1 and if
- * this vnode was mapped in the past, it is likely
- * that ubc_info freed due to the memory object getting recycled.
- * Just re-initialize the ubc_info.
+ * we may have blocked. Re-evaluate the state
*/
- if ((vp->v_usecount == 1) && UBCISVALID(vp)) {
- if (UBCINFOMISSING(vp))
- panic("vget: lost ubc_info");
-
- if (ISSET(vp->v_flag, VTERMINATE)) {
- /*
- * vnode is being terminated.
- * wait for vnode_pager_no_senders() to clear
- * VTERMINATE
- */
- SET(vp->v_flag, VTERMWANT);
+ simple_lock(&vnode_free_list_slock);
+ if (VONLIST(vp)) {
+ if (vp->v_usecount == 0)
+ VREMFREE("vget", vp);
+ else if (ISSET((vp)->v_flag, VUINACTIVE))
+ VREMINACTIVE("vget", vp);
+ }
+ simple_unlock(&vnode_free_list_slock);
+
+ /*
+ * If the vnode was not active in the first place
+ * must not call vrele() as VOP_INACTIVE() is not
+ * required.
+ * So inlined part of vrele() here.
+ */
+ if (--vp->v_usecount == 1) {
+ if (UBCINFOEXISTS(vp)) {
+ vinactive(vp);
simple_unlock(&vp->v_interlock);
- tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vclean", 0);
- /* return error */
- return (ENOENT);
+ return (error);
}
+ }
+ if (vp->v_usecount > 0) {
+ simple_unlock(&vp->v_interlock);
+ return (error);
+ }
+ if (vp->v_usecount < 0)
+ panic("vget: negative usecount (%d)", vp->v_usecount);
+ vfree(vp);
+ simple_unlock(&vp->v_interlock);
+ return (error);
+}
- if ((!UBCINFOEXISTS(vp)) && ISSET(vp->v_flag, VWASMAPPED)) {
- simple_unlock(&vp->v_interlock);
- ubc_info_init(vp);
- simple_lock(&vp->v_interlock);
- } else
- panic("vget: stolen ubc_info");
+/*
+ * Get a pager reference on the particular vnode.
+ *
+ * This is called from ubc_info_init() and it is asumed that
+ * the vnode is not on the free list.
+ * It is also assumed that the vnode is neither being recycled
+ * by vgonel nor being terminated by vnode_pager_vrele().
+ *
+ * The vnode interlock is NOT held by the caller.
+ */
+__private_extern__ int
+vnode_pager_vget(vp)
+ struct vnode *vp;
+{
+ simple_lock(&vp->v_interlock);
+
+ UBCINFOCHECK("vnode_pager_vget", vp);
- if (!ubc_issetflags(vp, UI_HASOBJREF))
- if (ubc_getobject(vp, (UBC_NOREACTIVATE|UBC_HOLDOBJECT)))
- panic("vget: null object");
+ if (ISSET(vp->v_flag, (VXLOCK|VORECLAIM|VTERMINATE)))
+ panic("%s: dying vnode", "vnode_pager_vget");
+
+ simple_lock(&vnode_free_list_slock);
+ /* The vnode should not be on free list */
+ if (VONLIST(vp)) {
+ if (vp->v_usecount == 0)
+ panic("%s: still on list", "vnode_pager_vget");
+ else if (ISSET((vp)->v_flag, VUINACTIVE))
+ VREMINACTIVE("vnode_pager_vget", vp);
}
-out:
- if ((flags & LK_INTERLOCK) == 0)
- simple_unlock(&vp->v_interlock);
+
+ /* The vnode should not be on the inactive list here */
+ simple_unlock(&vnode_free_list_slock);
+
+ /* After all those checks, now do the real work :-) */
+ if (++vp->v_usecount <= 0)
+ panic("vnode_pager_vget: v_usecount");
+ simple_unlock(&vp->v_interlock);
+
return (0);
}
if (vp->v_vnlock == NULL) {
if ((flags & LK_TYPE_MASK) == LK_DRAIN)
return (0);
- MALLOC_ZONE(vp->v_vnlock, struct lock__bsd__ *,
- sizeof(struct lock__bsd__), M_VNODE, M_WAITOK);
+ MALLOC(vp->v_vnlock, struct lock__bsd__ *,
+ sizeof(struct lock__bsd__), M_TEMP, M_WAITOK);
lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
}
switch (flags & LK_TYPE_MASK) {
panic("vref used where vget required");
/* If on the inactive list, remove it from there */
- if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
- if (VONLIST(vp)) {
- simple_lock(&vnode_free_list_slock);
- VREMINACTIVE("vref", vp);
- simple_unlock(&vnode_free_list_slock);
- }
- }
- /* The vnode should not be on the inactive list here */
- VINACTIVECHECK("vref", vp, 0);
+ simple_lock(&vnode_free_list_slock);
+ if (ISSET((vp)->v_flag, VUINACTIVE))
+ VREMINACTIVE("vref", vp);
+ simple_unlock(&vnode_free_list_slock);
if (++vp->v_usecount <= 0)
panic("vref v_usecount");
simple_unlock(&vp->v_interlock);
}
+static void
+clean_up_name_parent_ptrs(struct vnode *vp)
+{
+ if (VNAME(vp) || VPARENT(vp)) {
+ char *tmp1;
+ struct vnode *tmp2;
+
+ // do it this way so we don't block before clearing
+ // these fields.
+ tmp1 = VNAME(vp);
+ tmp2 = VPARENT(vp);
+ VNAME(vp) = NULL;
+ VPARENT(vp) = NULL;
+
+ if (tmp1) {
+ remove_name(tmp1);
+ }
+
+ if (tmp2) {
+ vrele(tmp2);
+ }
+ }
+}
+
+
/*
* put the vnode on appropriate free list.
* called with v_interlock held.
vfree(vp)
struct vnode *vp;
{
+ funnel_t *curflock;
+ extern int disable_funnel;
+
+ if ((curflock = thread_funnel_get()) != kernel_flock &&
+ !(disable_funnel && curflock != THR_FUNNEL_NULL))
+ panic("Entering vfree() without kernel funnel");
+
/*
* if the vnode is not obtained by calling getnewvnode() we
* are not responsible for the cleanup. Just return.
/* insert at tail of LRU list or at head if VAGE is set */
simple_lock(&vnode_free_list_slock);
+ // make sure the name & parent pointers get cleared out
+// clean_up_name_parent_ptrs(vp);
+
if (VONLIST(vp))
- panic("vfree: vnode still on list");
+ panic("%s: vnode still on list", "vfree");
if (vp->v_flag & VAGE) {
TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
vinactive(vp)
struct vnode *vp;
{
+ funnel_t *curflock;
+ extern int disable_funnel;
+
+ if ((curflock = thread_funnel_get()) != kernel_flock &&
+ !(disable_funnel && curflock != THR_FUNNEL_NULL))
+ panic("Entering vinactive() without kernel funnel");
+
if (!UBCINFOEXISTS(vp))
panic("vinactive: not a UBC vnode");
simple_lock(&vnode_free_list_slock);
if (VONLIST(vp))
- panic("vinactive: vnode still on list");
+ panic("%s: vnode still on list", "vinactive");
VINACTIVECHECK("vinactive", vp, 0);
TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_freelist);
{
struct proc *p = current_proc(); /* XXX */
-#if DIAGNOSTIC
- if (vp == NULL)
- panic("vput: null vp");
-#endif
simple_lock(&vp->v_interlock);
if (--vp->v_usecount == 1) {
if (UBCINFOEXISTS(vp)) {
vp->v_usecount, vp->v_writecount);
}
#endif
- if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
- VREMINACTIVE("vrele", vp);
+ simple_lock(&vnode_free_list_slock);
+ if (ISSET((vp)->v_flag, VUINACTIVE))
+ VREMINACTIVE("vref", vp);
+ simple_unlock(&vnode_free_list_slock);
simple_unlock(&vp->v_interlock);
VOP_INACTIVE(vp, p);
struct vnode *vp;
{
struct proc *p = current_proc(); /* XXX */
+ funnel_t *curflock;
+ extern int disable_funnel;
+
+ if ((curflock = thread_funnel_get()) != kernel_flock &&
+ !(disable_funnel && curflock != THR_FUNNEL_NULL))
+ panic("Entering vrele() without kernel funnel");
-#if DIAGNOSTIC
- if (vp == NULL)
- panic("vrele: null vp");
-#endif
simple_lock(&vp->v_interlock);
if (--vp->v_usecount == 1) {
if (UBCINFOEXISTS(vp)) {
- vinactive(vp);
+ if ((vp->v_flag & VXLOCK) == 0)
+ vinactive(vp);
simple_unlock(&vp->v_interlock);
return;
}
panic("vrele: ref cnt");
}
#endif
- if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
- VREMINACTIVE("vrele", vp);
-
if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
/* vnode is being cleaned, just return */
vagevp(vp)
struct vnode *vp;
{
- assert(vp);
simple_lock(&vp->v_interlock);
vp->v_flag |= VAGE;
simple_unlock(&vp->v_interlock);
struct vnode *skipvp;
int flags;
{
- struct proc *p = current_proc(); /* XXX */
+ struct proc *p = current_proc();
struct vnode *vp, *nvp;
int busy = 0;
simple_lock(&vp->v_interlock);
/*
- * Skip over a vnodes marked VSYSTEM.
+ * Skip over a vnodes marked VSYSTEM or VNOFLUSH.
*/
- if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
+ if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || (vp->v_flag & VNOFLUSH))) {
simple_unlock(&vp->v_interlock);
continue;
}
busy++;
}
simple_unlock(&mntvnode_slock);
- if (busy)
+ if (busy && ((flags & FORCECLOSE)==0))
return (EBUSY);
return (0);
}
struct proc *p;
{
int active;
- void *obj;
- int removed = 0;
+ int didhold;
/*
* if the vnode is not obtained by calling getnewvnode() we
* so that its count cannot fall to zero and generate a
* race against ourselves to recycle it.
*/
- if (active = vp->v_usecount)
+ if (active = vp->v_usecount) {
+ /*
+ * active vnode can not be on the free list.
+ * we are about to take an extra reference on this vnode
+ * do the queue management as needed
+ * Not doing so can cause "still on list" or
+ * "vnreclaim: v_usecount" panic if VOP_LOCK() blocks.
+ */
+ simple_lock(&vnode_free_list_slock);
+ if (ISSET((vp)->v_flag, VUINACTIVE))
+ VREMINACTIVE("vclean", vp);
+ simple_unlock(&vnode_free_list_slock);
+
if (++vp->v_usecount <= 0)
panic("vclean: v_usecount");
+ }
+
/*
* Prevent the vnode from being recycled or
* brought into use while we clean it out.
VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
/*
+ * While blocked in VOP_LOCK() someone could have dropped
+ * reference[s] and we could land on the inactive list.
* if this vnode is on the inactive list
* take it off the list.
*/
- if ((active == 1) &&
- (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))) {
- simple_lock(&vnode_free_list_slock);
+ simple_lock(&vnode_free_list_slock);
+ if (ISSET((vp)->v_flag, VUINACTIVE))
VREMINACTIVE("vclean", vp);
- simple_unlock(&vnode_free_list_slock);
- removed++;
- }
+ simple_unlock(&vnode_free_list_slock);
+
+ /* Clean the pages in VM. */
+ if (active && (flags & DOCLOSE))
+ VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
/* Clean the pages in VM. */
- if ((active) && UBCINFOEXISTS(vp)) {
+ didhold = ubc_hold(vp);
+ if ((active) && (didhold))
(void)ubc_clean(vp, 0); /* do not invalidate */
- }
/*
* Clean out any buffers associated with the vnode.
else
vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
}
- /*
- * If purging an active vnode, it must be closed and
- * deactivated before being reclaimed. Note that the
- * VOP_INACTIVE will unlock the vnode.
- */
- if (active) {
- if (flags & DOCLOSE)
- VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
+
+ if (active)
VOP_INACTIVE(vp, p);
- } else {
- /*
- * Any other processes trying to obtain this lock must first
- * wait for VXLOCK to clear, then call the new lock operation.
- */
+ else
VOP_UNLOCK(vp, 0, p);
+
+ /* Destroy ubc named reference */
+ if (didhold) {
+ ubc_rele(vp);
+ ubc_destroy_named(vp);
+ }
+ /*
+ * Make sure vp isn't on the inactive list.
+ */
+ simple_lock(&vnode_free_list_slock);
+ if (ISSET((vp)->v_flag, VUINACTIVE)) {
+ VREMINACTIVE("vclean", vp);
}
+ simple_unlock(&vnode_free_list_slock);
+
/*
* Reclaim the vnode.
*/
if (VOP_RECLAIM(vp, p))
panic("vclean: cannot reclaim");
- if (active)
- vrele(vp);
+
+ // make sure the name & parent ptrs get cleaned out!
+ clean_up_name_parent_ptrs(vp);
+
cache_purge(vp);
if (vp->v_vnlock) {
- if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
+ struct lock__bsd__ *tmp = vp->v_vnlock;
+ if ((tmp->lk_flags & LK_DRAINED) == 0)
vprint("vclean: lock not drained", vp);
- FREE_ZONE(vp->v_vnlock, sizeof (struct lock__bsd__), M_VNODE);
vp->v_vnlock = NULL;
+ FREE(tmp, M_TEMP);
}
/* It's dead, Jim! */
vp->v_op = dead_vnodeop_p;
vp->v_tag = VT_NON;
- /*
- * v_data is reclaimed by VOP_RECLAIM, all the vnode
- * operation generated by the code below would be directed
- * to the deadfs
- */
- if (UBCINFOEXISTS(vp)) {
- /* vnode is dying, destroy the object */
- if (ubc_issetflags(vp, UI_HASOBJREF)) {
- obj = ubc_getobject(vp, UBC_NOREACTIVATE);
- if (obj == NULL)
- panic("vclean: null object");
- if (ISSET(vp->v_flag, VTERMINATE))
- panic("vclean: already teminating");
- SET(vp->v_flag, VTERMINATE);
-
- ubc_clearflags(vp, UI_HASOBJREF);
- memory_object_destroy(obj, 0);
-
- /*
- * memory_object_destroy() is asynchronous with respect
- * to vnode_pager_no_senders().
- * wait for vnode_pager_no_senders() to clear
- * VTERMINATE
- */
- while (ISSET(vp->v_flag, VTERMINATE)) {
- SET(vp->v_flag, VTERMWANT);
- tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vclean", 0);
- }
- if (UBCINFOEXISTS(vp)) {
- ubc_info_free(vp);
- vp->v_ubcinfo = UBC_NOINFO; /* catch bad accesses */
- }
- }
- }
+ insmntque(vp, (struct mount *)0);
/*
* Done with purge, notify sleepers of the grim news.
vp->v_flag &= ~VXWANT;
wakeup((caddr_t)vp);
}
+
+ if (active)
+ vrele(vp);
}
/*
} */ *ap;
{
struct vnode *vp, *vq;
- struct proc *p = current_proc(); /* XXX */
+ struct proc *p = current_proc();
#if DIAGNOSTIC
if ((ap->a_flags & REVOKEALL) == 0)
while (vp->v_flag & VXLOCK) {
vp->v_flag |= VXWANT;
simple_unlock(&vp->v_interlock);
- tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
+ (void)tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
}
return (0);
}
vgone(vp)
struct vnode *vp;
{
- struct proc *p = current_proc(); /* XXX */
+ struct proc *p = current_proc();
simple_lock(&vp->v_interlock);
vgonel(vp, p);
while (vp->v_flag & VXLOCK) {
vp->v_flag |= VXWANT;
simple_unlock(&vp->v_interlock);
- tsleep((caddr_t)vp, PINOD, "vgone", 0);
+ (void)tsleep((caddr_t)vp, PINOD, "vgone", 0);
}
return;
}
vp->v_flag &= ~VALIASED;
}
simple_unlock(&spechash_slock);
- FREE_ZONE(vp->v_specinfo, sizeof (struct specinfo), M_VNODE);
+ {
+ struct specinfo *tmp = vp->v_specinfo;
vp->v_specinfo = NULL;
+ FREE_ZONE((void *)tmp, sizeof(struct specinfo), M_SPECINFO);
+ }
}
/*
* If it is on the freelist and not already at the head,
* getnewvnode after removing it from the freelist to ensure
* that we do not try to move it here.
*/
- if (vp->v_usecount == 0) {
+ if (vp->v_usecount == 0 && (vp->v_flag & VUINACTIVE) == 0) {
simple_lock(&vnode_free_list_slock);
if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
vnode_free_list.tqh_first != vp) {
strcat(buf, "|VTEXT");
if (vp->v_flag & VSYSTEM)
strcat(buf, "|VSYSTEM");
+ if (vp->v_flag & VNOFLUSH)
+ strcat(buf, "|VNOFLUSH");
if (vp->v_flag & VXLOCK)
strcat(buf, "|VXLOCK");
if (vp->v_flag & VXWANT)
void
printlockedvnodes()
{
- struct proc *p = current_proc(); /* XXX */
+ struct proc *p = current_proc();
struct mount *mp, *nmp;
struct vnode *vp;
}
#endif
-/*
- * Top level filesystem related information gathering.
- */
-int
-vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
- int *name;
- u_int namelen;
+static int
+build_path(struct vnode *vp, char *buff, int buflen, int *outlen)
+{
+ char *end, *str;
+ int i, len, ret=0, counter=0;
+
+ end = &buff[buflen-1];
+ *--end = '\0';
+
+ while(vp && VPARENT(vp) != vp) {
+ // the maximum depth of a file system hierarchy is MAXPATHLEN/2
+ // (with single-char names separated by slashes). we panic if
+ // we've ever looped more than that.
+ if (counter++ > MAXPATHLEN/2) {
+ panic("build_path: vnode parent chain is too long! vp 0x%x\n", vp);
+ }
+ str = VNAME(vp);
+ if (VNAME(vp) == NULL) {
+ if (VPARENT(vp) != NULL) {
+ ret = EINVAL;
+ }
+ break;
+ }
+
+ // count how long the string is
+ for(len=0; *str; str++, len++)
+ /* nothing */;
+
+ // check that there's enough space
+ if ((end - buff) < len) {
+ ret = ENOSPC;
+ break;
+ }
+
+ // copy it backwards
+ for(; len > 0; len--) {
+ *--end = *--str;
+ }
+
+ // put in the path separator
+ *--end = '/';
+
+ // walk up the chain.
+ vp = VPARENT(vp);
+
+ // check if we're crossing a mount point and
+ // switch the vp if we are.
+ if (vp && (vp->v_flag & VROOT)) {
+ vp = vp->v_mount->mnt_vnodecovered;
+ }
+ }
+
+ // slide it down to the beginning of the buffer
+ memmove(buff, end, &buff[buflen] - end);
+
+ *outlen = &buff[buflen] - end;
+
+ return ret;
+}
+
+__private_extern__ int
+vn_getpath(struct vnode *vp, char *pathbuf, int *len)
+{
+ return build_path(vp, pathbuf, *len, len);
+}
+
+
+
+/*
+ * Top level filesystem related information gathering.
+ */
+int
+vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+ int *name;
+ u_int namelen;
void *oldp;
size_t *oldlenp;
void *newp;
size_t newlen;
struct proc *p;
{
- struct ctldebug *cdp;
struct vfsconf *vfsp;
+ int *username;
+ u_int usernamelen;
+ int error;
- if (name[0] == VFS_NUMMNTOPS) {
+ /*
+ * The VFS_NUMMNTOPS shouldn't be at name[0] since
+ * is a VFS generic variable. So now we must check
+ * namelen so we don't end up covering any UFS
+ * variables (sinc UFS vfc_typenum is 1).
+ *
+ * It should have been:
+ * name[0]: VFS_GENERIC
+ * name[1]: VFS_NUMMNTOPS
+ */
+ if (namelen == 1 && name[0] == VFS_NUMMNTOPS) {
extern unsigned int vfs_nummntops;
return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops));
}
/* all sysctl names at this level are at least name and field */
if (namelen < 2)
- return (ENOTDIR); /* overloaded */
+ return (EISDIR); /* overloaded */
if (name[0] != VFS_GENERIC) {
for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
if (vfsp->vfc_typenum == name[0])
return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
sizeof(struct vfsconf)));
}
- return (EOPNOTSUPP);
+ /*
+ * We need to get back into the general MIB, so we need to re-prepend
+ * CTL_VFS to our name and try userland_sysctl().
+ */
+ usernamelen = namelen + 1;
+ MALLOC(username, int *, usernamelen * sizeof(*username),
+ M_TEMP, M_WAITOK);
+ bcopy(name, username + 1, namelen * sizeof(*name));
+ username[0] = CTL_VFS;
+ error = userland_sysctl(p, username, usernamelen, oldp, oldlenp, 1,
+ newp, newlen, oldlenp);
+ FREE(username, M_TEMP);
+ return (error);
}
int kinfo_vdebug = 1;
nvp = vp->v_mntvnodes.le_next;
if (bp + VPTRSZ + VNODESZ > ewhere) {
simple_unlock(&mntvnode_slock);
+ vfs_unbusy(mp, p);
*sizep = bp - where;
return (ENOMEM);
}
simple_unlock(&mntvnode_slock);
if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
- (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
+ (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) {
+ vfs_unbusy(mp, p);
return (error);
+ }
bp += VPTRSZ + VNODESZ;
simple_lock(&mntvnode_slock);
}
* Unmount all filesystems. The list is traversed in reverse order
* of mounting to avoid dependencies.
*/
-void
+__private_extern__ void
vfs_unmountall()
{
struct mount *mp, *nmp;
- struct proc *p = current_proc(); /* XXX */
+ struct proc *p = current_proc();
/*
* Since this only runs when rebooting, it is not interlocked.
/*
* Build hash lists of net addresses and hang them off the mount point.
- * Called by ufs_mount() to set up the lists of export addresses.
+ * Called by vfs_export() to set up the lists of export addresses.
*/
static int
vfs_hang_addrlist(mp, nep, argp)
* try to reclaim vnodes from the memory
* object cache
*/
-int
+static int
vm_object_cache_reclaim(int count)
{
int cnt;
* and then try to reclaim some vnodes from the memory
* object cache
*/
-int
+static int
vnreclaim(int count)
{
- int cnt, i, loopcnt;
- void *obj;
+ int i, loopcnt;
struct vnode *vp;
int err;
struct proc *p;
for (vp = TAILQ_FIRST(&vnode_inactive_list);
(vp != NULLVP) && (i < count);
vp = TAILQ_NEXT(vp, v_freelist)) {
+
+ if (!simple_lock_try(&vp->v_interlock))
+ continue;
- if (simple_lock_try(&vp->v_interlock)) {
- if (vp->v_usecount != 1)
- panic("vnreclaim: v_usecount");
-
- if(!UBCINFOEXISTS(vp)) {
- if (vp->v_type == VBAD) {
- VREMINACTIVE("vnreclaim", vp);
- simple_unlock(&vp->v_interlock);
- continue;
- } else
- panic("non UBC vnode on inactive list");
- /* Should not reach here */
- }
+ if (vp->v_usecount != 1)
+ panic("vnreclaim: v_usecount");
- /* If vnode is already being reclaimed, wait */
- if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
- vp->v_flag |= VXWANT;
+ if(!UBCINFOEXISTS(vp)) {
+ if (vp->v_type == VBAD) {
+ VREMINACTIVE("vnreclaim", vp);
simple_unlock(&vp->v_interlock);
- simple_unlock(&vnode_free_list_slock);
- (void)tsleep((caddr_t)vp, PINOD, "vocr", 0);
- goto restart;
- }
+ continue;
+ } else
+ panic("non UBC vnode on inactive list");
+ /* Should not reach here */
+ }
- VREMINACTIVE("vnreclaim", vp);
+ /* If vnode is already being reclaimed, wait */
+ if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
+ vp->v_flag |= VXWANT;
+ simple_unlock(&vp->v_interlock);
simple_unlock(&vnode_free_list_slock);
+ (void)tsleep((caddr_t)vp, PINOD, "vocr", 0);
+ goto restart;
+ }
- /* held vnodes must not be reclaimed */
- if (vp->v_ubcinfo->ui_holdcnt) { /* XXX */
- vinactive(vp);
- simple_unlock(&vp->v_interlock);
- goto restart;
- }
+ /*
+ * if the vnode is being initialized,
+ * skip over it
+ */
+ if (ISSET(vp->v_flag, VUINIT)) {
+ SET(vp->v_flag, VUWANT);
+ simple_unlock(&vp->v_interlock);
+ continue;
+ }
- if (ubc_issetflags(vp, UI_WASMAPPED)) {
- /*
- * We should not reclaim as it is likely
- * to be in use. Let it die a natural death.
- * Release the UBC reference if one exists
- * and put it back at the tail.
- */
- if (ubc_issetflags(vp, UI_HASOBJREF)) {
- obj = ubc_getobject(vp, UBC_NOREACTIVATE);
- if (obj == NULL)
- panic("vnreclaim: null object");
- /* release the reference gained by ubc_info_init() */
- ubc_clearflags(vp, UI_HASOBJREF);
- simple_unlock(&vp->v_interlock);
- vm_object_deallocate(obj);
- /*
- * The vnode interlock was release.
- * vm_object_deallocate() might have blocked.
- * It is possible that the object was terminated.
- * It is also possible that the vnode was
- * reactivated. Evaluate the state again.
- */
- if (UBCINFOEXISTS(vp)) {
- simple_lock(&vp->v_interlock);
- if ((vp->v_usecount == 1) && !VONLIST(vp))
- vinactive(vp);
- simple_unlock(&vp->v_interlock);
- }
- } else {
- vinactive(vp);
- simple_unlock(&vp->v_interlock);
- }
- } else {
- VORECLAIM_ENABLE(vp);
+ VREMINACTIVE("vnreclaim", vp);
+ simple_unlock(&vnode_free_list_slock);
- /*
- * scrub the dirty pages and invalidate the buffers
- */
- p = current_proc();
- err = vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p);
- if (err) {
- /* cannot reclaim */
+ if (ubc_issetflags(vp, UI_WASMAPPED)) {
+ /*
+ * We should not reclaim as it is likely
+ * to be in use. Let it die a natural death.
+ * Release the UBC reference if one exists
+ * and put it back at the tail.
+ */
+ simple_unlock(&vp->v_interlock);
+ if (ubc_release_named(vp)) {
+ if (UBCINFOEXISTS(vp)) {
simple_lock(&vp->v_interlock);
- vinactive(vp);
- VORECLAIM_DISABLE(vp);
+ if (vp->v_usecount == 1 && !VONLIST(vp))
+ vinactive(vp);
simple_unlock(&vp->v_interlock);
- goto restart;
}
+ } else {
+ simple_lock(&vp->v_interlock);
+ vinactive(vp);
+ simple_unlock(&vp->v_interlock);
+ }
+ } else {
+ int didhold;
+
+ VORECLAIM_ENABLE(vp);
+
+ /*
+ * scrub the dirty pages and invalidate the buffers
+ */
+ p = current_proc();
+ err = vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p);
+ if (err) {
+ /* cannot reclaim */
simple_lock(&vp->v_interlock);
- if(vp->v_usecount != 1)
- panic("VOCR: usecount race");
+ vinactive(vp);
+ VORECLAIM_DISABLE(vp);
+ i++;
simple_unlock(&vp->v_interlock);
+ goto restart;
+ }
- /*
- * If the UBC reference on the memory object
- * was already lost, regain it. This will
- * keep the memory object alive for rest of the
- * reclaim and finally this reference would
- * be lost by memory_object_destroy()
- */
- obj = ubc_getobject(vp, (UBC_NOREACTIVATE|UBC_HOLDOBJECT));
- if (obj == (void *)NULL)
- panic("vnreclaim: null object");
+ /* keep the vnode alive so we can kill it */
+ simple_lock(&vp->v_interlock);
+ if(vp->v_usecount != 1)
+ panic("VOCR: usecount race");
+ vp->v_usecount++;
+ simple_unlock(&vp->v_interlock);
- /* clean up the state in VM without invalidating */
+ /* clean up the state in VM without invalidating */
+ didhold = ubc_hold(vp);
+ if (didhold)
(void)ubc_clean(vp, 0);
- /* flush and invalidate buffers associated with the vnode */
- if (vp->v_tag == VT_NFS)
- nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
- else
- vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
-
- /*
- * It is not necessary to call ubc_uncache()
- * here because memory_object_destroy() marks
- * the memory object non cachable already
- *
- * Need to release the vnode lock before calling
- * vm_object_deallocate() to avoid deadlock
- * when the vnode goes through vop_inactive
- *
- * Note: for the v_usecount == 1 case, VOP_INACTIVE
- * has not yet been called. Call it now while vp is
- * still locked, it will also release the lock.
- */
- if (vp->v_usecount == 1)
- VOP_INACTIVE(vp, p);
- else
- VOP_UNLOCK(vp, 0, p);
-
- /*
- * This vnode is ready to be reclaimed.
- * Terminate the memory object.
- * memory_object_destroy() will result in
- * vnode_pager_no_senders().
- * That will release the pager reference
- * and the vnode will move to the free list.
- */
- if (ISSET(vp->v_flag, VTERMINATE))
- panic("vnreclaim: already teminating");
- SET(vp->v_flag, VTERMINATE);
+ /* flush and invalidate buffers associated with the vnode */
+ if (vp->v_tag == VT_NFS)
+ nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
+ else
+ vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
- memory_object_destroy(obj, 0);
+ /*
+ * Note: for the v_usecount == 2 case, VOP_INACTIVE
+ * has not yet been called. Call it now while vp is
+ * still locked, it will also release the lock.
+ */
+ if (vp->v_usecount == 2)
+ VOP_INACTIVE(vp, p);
+ else
+ VOP_UNLOCK(vp, 0, p);
- /*
- * memory_object_destroy() is asynchronous with respect
- * to vnode_pager_no_senders().
- * wait for vnode_pager_no_senders() to clear
- * VTERMINATE
- */
- while (ISSET(vp->v_flag, VTERMINATE)) {
- SET(vp->v_flag, VTERMWANT);
- tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vnreclaim", 0);
- }
- simple_lock(&vp->v_interlock);
- VORECLAIM_DISABLE(vp);
- i++;
- simple_unlock(&vp->v_interlock);
+ if (didhold)
+ ubc_rele(vp);
+
+ /*
+ * destroy the ubc named reference.
+ * If we can't because it is held for I/Os
+ * in progress, just put it back on the inactive
+ * list and move on. Otherwise, the paging reference
+ * is toast (and so is this vnode?).
+ */
+ if (ubc_destroy_named(vp)) {
+ i++;
}
- /* inactive list lock was released, must restart */
- goto restart;
+ simple_lock(&vp->v_interlock);
+ VORECLAIM_DISABLE(vp);
+ simple_unlock(&vp->v_interlock);
+ vrele(vp); /* release extra use we added here */
}
+ /* inactive list lock was released, must restart */
+ goto restart;
}
simple_unlock(&vnode_free_list_slock);
* AGE the vnode so that it gets recycled quickly.
* Check lock status to decide whether to call vput() or vrele().
*/
-void
+__private_extern__ void
vnode_pager_vrele(struct vnode *vp)
{
boolean_t funnel_state;
int isvnreclaim = 1;
- if (vp == (struct vnode *) NULL)
- panic("vnode_pager_vrele: null vp");
-
funnel_state = thread_funnel_set(kernel_flock, TRUE);
/* Mark the vnode to be recycled */
}
if (!ISSET(vp->v_flag, VTERMINATE))
SET(vp->v_flag, VTERMINATE);
+
+ cache_purge(vp);
+
if (UBCINFOEXISTS(vp)) {
+ struct ubc_info *uip = vp->v_ubcinfo;
+
if (ubc_issetflags(vp, UI_WASMAPPED))
SET(vp->v_flag, VWASMAPPED);
- if ((vp->v_ubcinfo->ui_holdcnt) /* XXX */
- && !(vp->v_flag & VXLOCK))
- panic("vnode_pager_vrele: freeing held ubc_info");
-
- simple_unlock(&vp->v_interlock);
- ubc_info_free(vp);
vp->v_ubcinfo = UBC_NOINFO; /* catch bad accesses */
+ simple_unlock(&vp->v_interlock);
+ ubc_info_deallocate(uip);
} else {
if ((vp->v_type == VBAD) && ((vp)->v_ubcinfo != UBC_INFO_NULL)
&& ((vp)->v_ubcinfo != UBC_NOINFO)) {
- simple_unlock(&vp->v_interlock);
- ubc_info_free(vp);
+ struct ubc_info *uip = vp->v_ubcinfo;
+
vp->v_ubcinfo = UBC_NOINFO; /* catch bad accesses */
+ simple_unlock(&vp->v_interlock);
+ ubc_info_deallocate(uip);
} else {
simple_unlock(&vp->v_interlock);
}
void
walk_allvnodes()
{
- struct proc *p = current_proc(); /* XXX */
struct mount *mp, *nmp;
struct vnode *vp;
int cnt = 0;
printf("%d - inactive\n", cnt);
}
#endif /* DIAGNOSTIC */
+
+
+struct x_constraints {
+ u_int32_t x_maxreadcnt;
+ u_int32_t x_maxsegreadsize;
+ u_int32_t x_maxsegwritesize;
+};
+
+
+void
+vfs_io_attributes(vp, flags, iosize, vectors)
+ struct vnode *vp;
+ int flags; /* B_READ or B_WRITE */
+ int *iosize;
+ int *vectors;
+{
+ struct mount *mp;
+
+ /* start with "reasonable" defaults */
+ *iosize = MAXPHYS;
+ *vectors = 32;
+
+ mp = vp->v_mount;
+ if (mp != NULL) {
+ switch (flags) {
+ case B_READ:
+ if (mp->mnt_kern_flag & MNTK_IO_XINFO)
+ *iosize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxreadcnt;
+ else
+ *iosize = mp->mnt_maxreadcnt;
+ *vectors = mp->mnt_segreadcnt;
+ break;
+ case B_WRITE:
+ *iosize = mp->mnt_maxwritecnt;
+ *vectors = mp->mnt_segwritecnt;
+ break;
+ default:
+ break;
+ }
+ if (*iosize == 0)
+ *iosize = MAXPHYS;
+ if (*vectors == 0)
+ *vectors = 32;
+ }
+ return;
+}
+
+__private_extern__
+void
+vfs_io_maxsegsize(vp, flags, maxsegsize)
+ struct vnode *vp;
+ int flags; /* B_READ or B_WRITE */
+ int *maxsegsize;
+{
+ struct mount *mp;
+
+ /* start with "reasonable" default */
+ *maxsegsize = MAXPHYS;
+
+ mp = vp->v_mount;
+ if (mp != NULL) {
+ switch (flags) {
+ case B_READ:
+ if (mp->mnt_kern_flag & MNTK_IO_XINFO)
+ *maxsegsize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegreadsize;
+ else
+ /*
+ * if the extended info doesn't exist
+ * then use the maxread I/O size as the
+ * max segment size... this is the previous behavior
+ */
+ *maxsegsize = mp->mnt_maxreadcnt;
+ break;
+ case B_WRITE:
+ if (mp->mnt_kern_flag & MNTK_IO_XINFO)
+ *maxsegsize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegwritesize;
+ else
+ /*
+ * if the extended info doesn't exist
+ * then use the maxwrite I/O size as the
+ * max segment size... this is the previous behavior
+ */
+ *maxsegsize = mp->mnt_maxwritecnt;
+ break;
+ default:
+ break;
+ }
+ if (*maxsegsize == 0)
+ *maxsegsize = MAXPHYS;
+ }
+}
+
+
+#include <sys/disk.h>
+
+
+int
+vfs_init_io_attributes(devvp, mp)
+ struct vnode *devvp;
+ struct mount *mp;
+{
+ int error;
+ off_t readblockcnt;
+ off_t writeblockcnt;
+ off_t readmaxcnt;
+ off_t writemaxcnt;
+ off_t readsegcnt;
+ off_t writesegcnt;
+ off_t readsegsize;
+ off_t writesegsize;
+ u_long blksize;
+
+ u_int64_t temp;
+
+ struct proc *p = current_proc();
+ struct ucred *cred = p->p_ucred;
+
+ int isvirtual = 0;
+ /*
+ * determine if this mount point exists on the same device as the root
+ * partition... if so, then it comes under the hard throttle control
+ */
+ int thisunit = -1;
+ static int rootunit = -1;
+ extern struct vnode *rootvp;
+
+ if (rootunit == -1) {
+ if (VOP_IOCTL(rootvp, DKIOCGETBSDUNIT, (caddr_t)&rootunit, 0, cred, p))
+ rootunit = -1;
+ else if (rootvp == devvp)
+ mp->mnt_kern_flag |= MNTK_ROOTDEV;
+ }
+ if (devvp != rootvp && rootunit != -1) {
+ if (VOP_IOCTL(devvp, DKIOCGETBSDUNIT, (caddr_t)&thisunit, 0, cred, p) == 0) {
+ if (thisunit == rootunit)
+ mp->mnt_kern_flag |= MNTK_ROOTDEV;
+ }
+ }
+ if (VOP_IOCTL(devvp, DKIOCGETISVIRTUAL, (caddr_t)&isvirtual, 0, cred, p) == 0) {
+ if (isvirtual)
+ mp->mnt_kern_flag |= MNTK_VIRTUALDEV;
+ }
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD,
+ (caddr_t)&readblockcnt, 0, cred, p)))
+ return (error);
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE,
+ (caddr_t)&writeblockcnt, 0, cred, p)))
+ return (error);
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTREAD,
+ (caddr_t)&readmaxcnt, 0, cred, p)))
+ return (error);
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTWRITE,
+ (caddr_t)&writemaxcnt, 0, cred, p)))
+ return (error);
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD,
+ (caddr_t)&readsegcnt, 0, cred, p)))
+ return (error);
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE,
+ (caddr_t)&writesegcnt, 0, cred, p)))
+ return (error);
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTREAD,
+ (caddr_t)&readsegsize, 0, cred, p)))
+ return (error);
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTWRITE,
+ (caddr_t)&writesegsize, 0, cred, p)))
+ return (error);
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETBLOCKSIZE,
+ (caddr_t)&blksize, 0, cred, p)))
+ return (error);
+
+
+ if ( !(mp->mnt_kern_flag & MNTK_IO_XINFO)) {
+ MALLOC(mp->mnt_xinfo_ptr, void *, sizeof(struct x_constraints), M_TEMP, M_WAITOK);
+ mp->mnt_kern_flag |= MNTK_IO_XINFO;
+ }
+
+ if (readmaxcnt)
+ temp = (readmaxcnt > UINT32_MAX) ? UINT32_MAX : readmaxcnt;
+ else {
+ if (readblockcnt) {
+ temp = readblockcnt * blksize;
+ temp = (temp > UINT32_MAX) ? UINT32_MAX : temp;
+ } else
+ temp = MAXPHYS;
+ }
+ ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxreadcnt = (u_int32_t)temp;
+
+ if (writemaxcnt)
+ temp = (writemaxcnt > UINT32_MAX) ? UINT32_MAX : writemaxcnt;
+ else {
+ if (writeblockcnt) {
+ temp = writeblockcnt * blksize;
+ temp = (temp > UINT32_MAX) ? UINT32_MAX : temp;
+ } else
+ temp = MAXPHYS;
+ }
+ mp->mnt_maxwritecnt = (u_int32_t)temp;
+
+ if (readsegcnt) {
+ temp = (readsegcnt > UINT16_MAX) ? UINT16_MAX : readsegcnt;
+ mp->mnt_segreadcnt = (u_int16_t)temp;
+ }
+ if (writesegcnt) {
+ temp = (writesegcnt > UINT16_MAX) ? UINT16_MAX : writesegcnt;
+ mp->mnt_segwritecnt = (u_int16_t)temp;
+ }
+ if (readsegsize)
+ temp = (readsegsize > UINT32_MAX) ? UINT32_MAX : readsegsize;
+ else
+ temp = mp->mnt_maxreadcnt;
+ ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegreadsize = (u_int32_t)temp;
+
+ if (writesegsize)
+ temp = (writesegsize > UINT32_MAX) ? UINT32_MAX : writesegsize;
+ else
+ temp = mp->mnt_maxwritecnt;
+ ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegwritesize = (u_int32_t)temp;
+
+ return (error);
+}
+
+static struct klist fs_klist;
+
+void
+vfs_event_init(void)
+{
+
+ klist_init(&fs_klist);
+}
+
+void
+vfs_event_signal(fsid_t *fsid, u_int32_t event, intptr_t data)
+{
+
+ KNOTE(&fs_klist, event);
+}
+
+/*
+ * return the number of mounted filesystems.
+ */
+static int
+sysctl_vfs_getvfscnt(void)
+{
+ struct mount *mp;
+ int ret = 0;
+
+ simple_lock(&mountlist_slock);
+ CIRCLEQ_FOREACH(mp, &mountlist, mnt_list)
+ ret++;
+ simple_unlock(&mountlist_slock);
+ return (ret);
+}
+
+/*
+ * fill in the array of fsid_t's up to a max of 'count', the actual
+ * number filled in will be set in '*actual'. If there are more fsid_t's
+ * than room in fsidlst then ENOMEM will be returned and '*actual' will
+ * have the actual count.
+ * having *actual filled out even in the error case is depended upon.
+ */
+static int
+sysctl_vfs_getvfslist(fsid_t *fsidlst, int count, int *actual)
+{
+ struct mount *mp;
+
+ *actual = 0;
+ simple_lock(&mountlist_slock);
+ CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
+ (*actual)++;
+ if (*actual <= count)
+ fsidlst[(*actual) - 1] = mp->mnt_stat.f_fsid;
+ }
+ simple_unlock(&mountlist_slock);
+ return (*actual <= count ? 0 : ENOMEM);
+}
+
+static int
+sysctl_vfs_vfslist SYSCTL_HANDLER_ARGS
+{
+ int actual, error;
+ size_t space;
+ fsid_t *fsidlst;
+
+ /* This is a readonly node. */
+ if (req->newptr != NULL)
+ return (EPERM);
+
+ /* they are querying us so just return the space required. */
+ if (req->oldptr == NULL) {
+ req->oldidx = sysctl_vfs_getvfscnt() * sizeof(fsid_t);
+ return 0;
+ }
+again:
+ /*
+ * Retrieve an accurate count of the amount of space required to copy
+ * out all the fsids in the system.
+ */
+ space = req->oldlen;
+ req->oldlen = sysctl_vfs_getvfscnt() * sizeof(fsid_t);
+
+ /* they didn't give us enough space. */
+ if (space < req->oldlen)
+ return (ENOMEM);
+
+ MALLOC(fsidlst, fsid_t *, req->oldlen, M_TEMP, M_WAITOK);
+ error = sysctl_vfs_getvfslist(fsidlst, req->oldlen / sizeof(fsid_t),
+ &actual);
+ /*
+ * If we get back ENOMEM, then another mount has been added while we
+ * slept in malloc above. If this is the case then try again.
+ */
+ if (error == ENOMEM) {
+ FREE(fsidlst, M_TEMP);
+ req->oldlen = space;
+ goto again;
+ }
+ if (error == 0) {
+ error = SYSCTL_OUT(req, fsidlst, actual * sizeof(fsid_t));
+ }
+ FREE(fsidlst, M_TEMP);
+ return (error);
+}
+
+/*
+ * Do a sysctl by fsid.
+ */
+static int
+sysctl_vfs_ctlbyfsid SYSCTL_HANDLER_ARGS
+{
+ struct vfsidctl vc;
+ struct mount *mp;
+ struct statfs *sp;
+ struct proc *p;
+ int *name;
+ int error, flags, namelen;
+
+ name = arg1;
+ namelen = arg2;
+ p = req->p;
+
+ error = SYSCTL_IN(req, &vc, sizeof(vc));
+ if (error)
+ return (error);
+ if (vc.vc_vers != VFS_CTL_VERS1)
+ return (EINVAL);
+ mp = vfs_getvfs(&vc.vc_fsid);
+ if (mp == NULL)
+ return (ENOENT);
+ /* reset so that the fs specific code can fetch it. */
+ req->newidx = 0;
+ /*
+ * Note if this is a VFS_CTL then we pass the actual sysctl req
+ * in for "oldp" so that the lower layer can DTRT and use the
+ * SYSCTL_IN/OUT routines.
+ */
+ if (mp->mnt_op->vfs_sysctl != NULL) {
+ error = mp->mnt_op->vfs_sysctl(name, namelen,
+ req, NULL, NULL, 0, req->p);
+ if (error != EOPNOTSUPP)
+ return (error);
+ }
+ switch (name[0]) {
+ case VFS_CTL_UMOUNT:
+ VCTLTOREQ(&vc, req);
+ error = SYSCTL_IN(req, &flags, sizeof(flags));
+ if (error)
+ break;
+ error = safedounmount(mp, flags, p);
+ break;
+ case VFS_CTL_STATFS:
+ VCTLTOREQ(&vc, req);
+ error = SYSCTL_IN(req, &flags, sizeof(flags));
+ if (error)
+ break;
+ sp = &mp->mnt_stat;
+ if (((flags & MNT_NOWAIT) == 0 || (flags & MNT_WAIT)) &&
+ (error = VFS_STATFS(mp, sp, p)))
+ return (error);
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ error = SYSCTL_OUT(req, sp, sizeof(*sp));
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (error);
+}
+
+static int filt_fsattach(struct knote *kn);
+static void filt_fsdetach(struct knote *kn);
+static int filt_fsevent(struct knote *kn, long hint);
+
+struct filterops fs_filtops =
+ { 0, filt_fsattach, filt_fsdetach, filt_fsevent };
+
+static int
+filt_fsattach(struct knote *kn)
+{
+
+ kn->kn_flags |= EV_CLEAR;
+ KNOTE_ATTACH(&fs_klist, kn);
+ return (0);
+}
+
+static void
+filt_fsdetach(struct knote *kn)
+{
+
+ KNOTE_DETACH(&fs_klist, kn);
+}
+
+static int
+filt_fsevent(struct knote *kn, long hint)
+{
+
+ kn->kn_fflags |= hint;
+ return (kn->kn_fflags != 0);
+}
+
+static int
+sysctl_vfs_noremotehang SYSCTL_HANDLER_ARGS
+{
+ int out, error;
+ pid_t pid;
+ size_t space;
+ struct proc *p;
+
+ /* We need a pid. */
+ if (req->newptr == NULL)
+ return (EINVAL);
+
+ error = SYSCTL_IN(req, &pid, sizeof(pid));
+ if (error)
+ return (error);
+
+ p = pfind(pid < 0 ? -pid : pid);
+ if (p == NULL)
+ return (ESRCH);
+
+ /*
+ * Fetching the value is ok, but we only fetch if the old
+ * pointer is given.
+ */
+ if (req->oldptr != NULL) {
+ out = !((p->p_flag & P_NOREMOTEHANG) == 0);
+ error = SYSCTL_OUT(req, &out, sizeof(out));
+ return (error);
+ }
+
+ /* cansignal offers us enough security. */
+ if (p != req->p && suser(req->p->p_ucred, &req->p->p_acflag) != 0)
+ return (EPERM);
+
+ if (pid < 0)
+ p->p_flag &= ~P_NOREMOTEHANG;
+ else
+ p->p_flag |= P_NOREMOTEHANG;
+
+ return (0);
+}
+/* the vfs.generic. branch. */
+SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RW, 0, "vfs generic hinge");
+/* retreive a list of mounted filesystem fsid_t */
+SYSCTL_PROC(_vfs_generic, OID_AUTO, vfsidlist, CTLFLAG_RD,
+ 0, 0, sysctl_vfs_vfslist, "S,fsid", "List of mounted filesystem ids");
+/* perform operations on filesystem via fsid_t */
+SYSCTL_NODE(_vfs_generic, OID_AUTO, ctlbyfsid, CTLFLAG_RW,
+ sysctl_vfs_ctlbyfsid, "ctlbyfsid");
+SYSCTL_PROC(_vfs_generic, OID_AUTO, noremotehang, CTLFLAG_RW,
+ 0, 0, sysctl_vfs_noremotehang, "I", "noremotehang");
+