X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/1c79356b52d46aa6b508fb032f5ae709b1f2897b..e5568f75972dfc723778653c11cb6b4dc825716a:/bsd/vfs/vfs_subr.c?ds=inline

diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c
index 216c1a0e4..0801f1a09 100644
--- a/bsd/vfs/vfs_subr.c
+++ b/bsd/vfs/vfs_subr.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -64,6 +64,7 @@
  * External virtual filesystem routines
  */
 
+#undef	DIAGNOSTIC
 #define DIAGNOSTIC 1
 
 #include <sys/param.h>
@@ -84,11 +85,21 @@
 #include <sys/ubc.h>
 #include <sys/vm.h>
 #include <sys/sysctl.h>
+#include <sys/filedesc.h>
+#include <sys/event.h>
+
+#include <string.h>
+#include <machine/spl.h>
+
 
 #include <kern/assert.h>
 
 #include <miscfs/specfs/specdev.h>
 
+#include <mach/mach_types.h>
+#include <mach/memory_object_types.h>
+
+
 enum vtype iftovt_tab[16] = {
 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
@@ -100,19 +111,10 @@ int	vttoif_tab[9] = {
 
 static void vfree(struct vnode *vp);
 static void vinactive(struct vnode *vp);
-extern int vnreclaim(int count);
+static int vnreclaim(int count);
 extern kern_return_t 
 	adjust_vm_object_cache(vm_size_t oval, vm_size_t nval);
 
-/*
- * Insq/Remq for the vnode usage lists.
- */
-#define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
-#define	bufremvn(bp) {							\
-	LIST_REMOVE(bp, b_vnbufs);					\
-	(bp)->b_vnbufs.le_next = NOLIST;				\
-}
-
 TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
 TAILQ_HEAD(inactivelst, vnode) vnode_inactive_list;	/* vnode inactive list */
 struct mntlist mountlist;			/* mounted filesystem list */
@@ -167,7 +169,7 @@ struct mntlist mountlist;			/* mounted filesystem list */
 #define VORECLAIM_ENABLE(vp)   \
 	do {	\
 		if (ISSET((vp)->v_flag, VORECLAIM))	\
-			panic("vm object raclaim already");	\
+			panic("vm_object_reclaim already");	\
 		SET((vp)->v_flag, VORECLAIM);	\
 	} while(0)
 
@@ -207,7 +209,7 @@ unsigned long vnodetarget;		/* target for vnreclaim() */
  * cache. Having too few vnodes on the free list causes serious disk
  * thrashing as we cycle through them.
  */
-#define VNODE_FREE_MIN		100	/* freelist should have at least these many */
+#define VNODE_FREE_MIN		300	/* freelist should have at least these many */
 
 /*
  * We need to get vnodes back from the VM object cache when a certain #
@@ -226,7 +228,7 @@ unsigned long vnodetarget;		/* target for vnreclaim() */
 /*
  * Initialize the vnode management data structures.
  */
-void
+__private_extern__ void
 vntblinit()
 {
 	extern struct lock__bsd__	exchangelock;
@@ -252,11 +254,16 @@ vntblinit()
 }
 
 /* Reset the VM Object Cache with the values passed in */
-kern_return_t
+__private_extern__ kern_return_t
 reset_vmobjectcache(unsigned int val1, unsigned int val2)
 {
 	vm_size_t oval = val1 - VNODE_FREE_MIN;
-	vm_size_t nval = val2 - VNODE_FREE_MIN;
+	vm_size_t nval;
+	
+	if(val2 < VNODE_FREE_MIN)
+		nval = 0;
+	else
+		nval = val2 - VNODE_FREE_MIN;
 
 	return(adjust_vm_object_cache(oval, nval));
 }
@@ -334,6 +341,11 @@ vfs_rootmountalloc(fstypename, devname, mpp)
 		return (ENODEV);
 	mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 	bzero((char *)mp, (u_long)sizeof(struct mount));
+
+    /* Initialize the default IO constraints */
+    mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
+    mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
+
 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
 	LIST_INIT(&mp->mnt_vnodelist);
@@ -390,8 +402,7 @@ vfs_getvfs(fsid)
 	register struct mount *mp;
 
 	simple_lock(&mountlist_slock);
-	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
-	     mp = mp->mnt_list.cqe_next) {
+	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 			simple_unlock(&mountlist_slock);
@@ -422,7 +433,7 @@ static u_short xxxfs_mntid;
 		++xxxfs_mntid;
 	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
 	tfsid.val[1] = mtype;
-	if (mountlist.cqh_first != (void *)&mountlist) {
+	if (!CIRCLEQ_EMPTY(&mountlist)) {
 		while (vfs_getvfs(&tfsid)) {
 			tfsid.val[0]++;
 			xxxfs_mntid++;
@@ -532,8 +543,8 @@ retry:
 				simple_unlock(&vp->v_interlock);
 				reclaimhits++;
 			} else
-			break;
-	}
+				break;
+		}
 	}
 
 	/*
@@ -582,15 +593,37 @@ retry:
 	else
 		vp->v_ubcinfo = 0;
 
+	if (vp->v_flag & VHASDIRTY)
+	        cluster_release(vp);
+
+	// make sure all these fields are cleared out as the
+	// name/parent stuff uses them and assumes they're
+	// cleared to null/0.
+	if (vp->v_scmap != NULL) {
+	    panic("getnewvnode: vp @ 0x%x has non-null scmap.\n", vp);
+	}
+	vp->v_un.vu_name = NULL;
+	vp->v_scdirty = 0;
+	vp->v_un1.v_cl.v_pad = 0;
+	
+	
 	vp->v_lastr = -1;
 	vp->v_ralen = 0;
 	vp->v_maxra = 0;
-	vp->v_lastw = 0;
 	vp->v_ciosiz = 0;
-	vp->v_cstart = 0;
 	vp->v_clen = 0;
 	vp->v_socket = 0;
 
+	/* we may have blocked, re-evaluate state */
+	simple_lock(&vnode_free_list_slock);
+	if (VONLIST(vp)) {
+		if (vp->v_usecount == 0)
+			VREMFREE("getnewvnode", vp);
+		 else if (ISSET((vp)->v_flag, VUINACTIVE))
+			VREMINACTIVE("getnewvnode", vp);
+	}
+	simple_unlock(&vnode_free_list_slock);
+
 done:
 	vp->v_flag = VSTANDARD;
 	vp->v_type = VNON;
@@ -626,6 +659,20 @@ insmntque(vp, mp)
 	simple_unlock(&mntvnode_slock);
 }
 
+__inline void
+vpwakeup(struct vnode *vp)
+{
+	if (vp) {
+		if (--vp->v_numoutput < 0)
+			panic("vpwakeup: neg numoutput");
+		if ((vp->v_flag & VBWAIT || vp->v_flag & VTHROTTLED)
+		    && vp->v_numoutput <= 0) {
+			vp->v_flag &= ~(VBWAIT|VTHROTTLED);
+			wakeup((caddr_t)&vp->v_numoutput);
+		}
+	}
+}
+
 /*
  * Update outstanding I/O count and do wakeup if requested.
  */
@@ -633,19 +680,8 @@ void
 vwakeup(bp)
 	register struct buf *bp;
 {
-	register struct vnode *vp;
-
 	CLR(bp->b_flags, B_WRITEINPROG);
-	if (vp = bp->b_vp) {
-		if (--vp->v_numoutput < 0)
-			panic("vwakeup: neg numoutput");
-		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
-			if (vp->v_numoutput < 0)
-				panic("vwakeup: neg numoutput 2");
-			vp->v_flag &= ~VBWAIT;
-			wakeup((caddr_t)&vp->v_numoutput);
-		}
-	}
+	vpwakeup(bp->b_vp);
 }
 
 /*
@@ -668,12 +704,12 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 		if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) {
 			return (error);
 		}
-		if (vp->v_dirtyblkhd.lh_first != NULL || (vp->v_flag & VHASDIRTY))
-			panic("vinvalbuf: dirty bufs");
+		if (vp->v_dirtyblkhd.lh_first)
+			panic("vinvalbuf: dirty bufs (vp 0x%x, bp 0x%x)", vp, vp->v_dirtyblkhd.lh_first);
 	}
 
 	for (;;) {
-		if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
+		if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
 			while (blist && blist->b_lblkno < 0)
 				blist = blist->b_vnbufs.le_next;
 		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
@@ -685,7 +721,7 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 
 		for (bp = blist; bp; bp = nbp) {
 			nbp = bp->b_vnbufs.le_next;
-			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
+			if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
 				continue;
 			s = splbio();
 			if (ISSET(bp->b_flags, B_BUSY)) {
@@ -711,7 +747,13 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 				(void) VOP_BWRITE(bp);
 				break;
 			}
-			SET(bp->b_flags, B_INVAL);
+
+			if (bp->b_flags & B_LOCKED) {
+				panic("vinvalbuf: bp @ 0x%x is locked!", bp);
+				break;
+			} else {
+				SET(bp->b_flags, B_INVAL);
+			}
 			brelse(bp);
 		}
 	}
@@ -721,82 +763,6 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 	return (0);
 }
 
-/*
- * Associate a buffer with a vnode.
- */
-void
-bgetvp(vp, bp)
-	register struct vnode *vp;
-	register struct buf *bp;
-{
-
-	if (bp->b_vp)
-		panic("bgetvp: not free");
-	VHOLD(vp);
-	bp->b_vp = vp;
-	if (vp->v_type == VBLK || vp->v_type == VCHR)
-		bp->b_dev = vp->v_rdev;
-	else
-		bp->b_dev = NODEV;
-	/*
-	 * Insert onto list for new vnode.
-	 */
-	bufinsvn(bp, &vp->v_cleanblkhd);
-}
-
-/*
- * Disassociate a buffer from a vnode.
- */
-void
-brelvp(bp)
-	register struct buf *bp;
-{
-	struct vnode *vp;
-
-	if (bp->b_vp == (struct vnode *) 0)
-		panic("brelvp: NULL");
-	/*
-	 * Delete from old vnode list, if on one.
-	 */
-	if (bp->b_vnbufs.le_next != NOLIST)
-		bufremvn(bp);
-	vp = bp->b_vp;
-	bp->b_vp = (struct vnode *) 0;
-	HOLDRELE(vp);
-}
-
-/*
- * Reassign a buffer from one vnode to another.
- * Used to assign file specific control information
- * (indirect blocks) to the vnode to which they belong.
- */
-void
-reassignbuf(bp, newvp)
-	register struct buf *bp;
-	register struct vnode *newvp;
-{
-	register struct buflists *listheadp;
-
-	if (newvp == NULL) {
-		printf("reassignbuf: NULL");
-		return;
-	}
-	/*
-	 * Delete from old vnode list, if on one.
-	 */
-	if (bp->b_vnbufs.le_next != NOLIST)
-		bufremvn(bp);
-	/*
-	 * If dirty, put on list of dirty buffers;
-	 * otherwise insert onto list of clean buffers.
-	 */
-	if (ISSET(bp->b_flags, B_DELWRI))
-		listheadp = &newvp->v_dirtyblkhd;
-	else
-		listheadp = &newvp->v_cleanblkhd;
-	bufinsvn(bp, listheadp);
-}
-
 /*
  * Create a vnode for a block device.
  * Used for root filesystem, argdev, and swap areas.
@@ -847,14 +813,13 @@ checkalias(nvp, nvp_rdev, mp)
 	struct proc *p = current_proc();	/* XXX */
 	struct vnode *vp;
 	struct vnode **vpp;
-	struct specinfo * bufhold;
-	int buffree = 1;
+	struct specinfo *specinfop;
 
 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
 		return (NULLVP);
 
-	bufhold = (struct specinfo *)_MALLOC_ZONE(sizeof(struct specinfo),
-			M_VNODE, M_WAITOK);
+	MALLOC_ZONE(specinfop, struct specinfo *, sizeof(struct specinfo),
+			M_SPECINFO, M_WAITOK);
 	vpp = &speclisth[SPECHASH(nvp_rdev)];
 loop:
 	simple_lock(&spechash_slock);
@@ -877,8 +842,8 @@ loop:
 		break;
 	}
 	if (vp == NULL || vp->v_tag != VT_NON) {
-		nvp->v_specinfo = bufhold;
-		buffree = 0;	/* buffer used */
+		nvp->v_specinfo = specinfop;
+		specinfop = 0;	/* buffer used */
 		bzero(nvp->v_specinfo, sizeof(struct specinfo));
 		nvp->v_rdev = nvp_rdev;
 		nvp->v_hashchain = vpp;
@@ -902,18 +867,20 @@ loop:
 	vp->v_tag = nvp->v_tag;
 	nvp->v_type = VNON;
 	insmntque(vp, mp);
-	if (buffree)
-		_FREE_ZONE((void *)bufhold, sizeof (struct specinfo), M_VNODE);
+	if (specinfop)
+		FREE_ZONE((void *)specinfop, sizeof(struct specinfo), M_SPECINFO);
 	return (vp);
 }
 
 /*
- * Grab a particular vnode from the free list, increment its
- * reference count and lock it. The vnode lock bit is set the
- * vnode is being eliminated in vgone. The process is awakened
- * when the transition is completed, and an error returned to
- * indicate that the vnode is no longer usable (possibly having
- * been changed to a new file system type).
+ * Get a reference on a particular vnode and lock it if requested.
+ * If the vnode was on the inactive list, remove it from the list.
+ * If the vnode was on the free list, remove it from the list and
+ * move it to inactive list as needed.
+ * The vnode lock bit is set if the vnode is being eliminated in
+ * vgone. The process is awakened when the transition is completed,
+ * and an error returned to indicate that the vnode is no longer
+ * usable (possibly having been changed to a new file system type).
  */
 int
 vget(vp, flags, p)
@@ -922,6 +889,11 @@ vget(vp, flags, p)
 	struct proc *p;
 {
 	int error = 0;
+	u_long vpid;
+
+	vpid = vp->v_id;    // save off the original v_id
+
+retry:
 
 	/*
 	 * If the vnode is in the process of being cleaned out for
@@ -934,7 +906,7 @@ vget(vp, flags, p)
 	if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
 		vp->v_flag |= VXWANT;
 		simple_unlock(&vp->v_interlock);
-		tsleep((caddr_t)vp, PINOD, "vget", 0);
+		(void)tsleep((caddr_t)vp, PINOD, "vget", 0);
 		return (ENOENT);
 	}
 
@@ -945,95 +917,142 @@ vget(vp, flags, p)
 	if (ISSET(vp->v_flag, VTERMINATE)) {
 		SET(vp->v_flag, VTERMWANT);
 		simple_unlock(&vp->v_interlock);
-		tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vclean", 0);
+		(void)tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vget1", 0);
 		return (ENOENT);
 	}
 
+	/*
+	 * if the vnode is being initialized,
+	 * wait for it to finish initialization
+	 */
+	if (ISSET(vp->v_flag,  VUINIT)) {
+		SET(vp->v_flag, VUWANT);
+		simple_unlock(&vp->v_interlock);
+		(void) tsleep((caddr_t)vp, PINOD, "vget2", 0);
+		goto retry;
+	}
+
 	simple_lock(&vnode_free_list_slock);
-	/* If on the free list, remove it from there */
-	if (vp->v_usecount == 0) {
-		if (VONLIST(vp))
+	if (VONLIST(vp)) {
+		if (vp->v_usecount == 0)
 			VREMFREE("vget", vp);
-	} else {
-		/* If on the inactive list, remove it from there */
-		if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
-			if (VONLIST(vp))
-				VREMINACTIVE("vget", vp);
-		}
+		 else if (ISSET((vp)->v_flag, VUINACTIVE))
+			VREMINACTIVE("vget", vp);
 	}
-
-	/* The vnode should not be on the inactive list here */
-	VINACTIVECHECK("vget", vp, 0);
-
 	simple_unlock(&vnode_free_list_slock);
+
 	if (++vp->v_usecount <= 0)
 		panic("vget: v_usecount");                     
 
+	/*
+	 * Recover named reference as needed
+	 */
+	if (UBCISVALID(vp) && !ubc_issetflags(vp, UI_HASOBJREF)) {
+		simple_unlock(&vp->v_interlock);
+		if (ubc_getobject(vp, UBC_HOLDOBJECT) == MEMORY_OBJECT_CONTROL_NULL) {
+			error = ENOENT;
+			goto errout;
+		}
+		simple_lock(&vp->v_interlock);
+	}
+
 	if (flags & LK_TYPE_MASK) {
-		if (error = vn_lock(vp, flags | LK_INTERLOCK, p)) {
-			/*
-			 * If the vnode was not active in the first place
-			 * must not call vrele() as VOP_INACTIVE() is not
-			 * required.
-			 * So inlined part of vrele() here.
-			 */
-			simple_lock(&vp->v_interlock);
-			if (--vp->v_usecount == 1) {
-				if (UBCINFOEXISTS(vp)) {
-					vinactive(vp);
-					simple_unlock(&vp->v_interlock);
-					return (error);
-				}
-			}
-			if (vp->v_usecount > 0) {
-				simple_unlock(&vp->v_interlock);
-				return (error);
-			}
-			if (vp->v_usecount < 0)
-				panic("vget: negative usecount (%d)", vp->v_usecount);
-			vfree(vp);
-			simple_unlock(&vp->v_interlock);
+		if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
+			goto errout;
+		if (vpid != vp->v_id) {    // make sure it's still the same vnode
+		    vput(vp);
+		    return ENOENT;
 		}
-		return (error);
+		return (0);
+	}
+
+	if ((flags & LK_INTERLOCK) == 0)
+		simple_unlock(&vp->v_interlock);
+
+	if (vpid != vp->v_id) {            // make sure it's still the same vnode
+	    vrele(vp);
+	    return ENOENT;
 	}
 
+	return (0);
+
+errout:
+	simple_lock(&vp->v_interlock);
+
 	/*
-	 * If this is a valid UBC vnode, if usecount is 1 and if
-	 * this vnode was mapped in the past, it is likely
-	 * that ubc_info freed due to the memory object getting recycled.
-	 * Just re-initialize the ubc_info.
+	 * we may have blocked. Re-evaluate the state
 	 */
-	if ((vp->v_usecount == 1) && UBCISVALID(vp)) {
-		if (UBCINFOMISSING(vp))
-			panic("vget: lost ubc_info");
-
-		if (ISSET(vp->v_flag, VTERMINATE)) {
-			/* 
-			 * vnode is being terminated.
-			 * wait for vnode_pager_no_senders() to clear
-			 * VTERMINATE
-			 */
-			SET(vp->v_flag, VTERMWANT);
+	simple_lock(&vnode_free_list_slock);
+	if (VONLIST(vp)) {
+		if (vp->v_usecount == 0)
+			VREMFREE("vget", vp);
+		 else if (ISSET((vp)->v_flag, VUINACTIVE))
+			VREMINACTIVE("vget", vp);
+	}
+	simple_unlock(&vnode_free_list_slock);
+
+	/*
+	 * If the vnode was not active in the first place
+	 * must not call vrele() as VOP_INACTIVE() is not
+	 * required.
+	 * So inlined part of vrele() here.
+	 */
+	if (--vp->v_usecount == 1) {
+		if (UBCINFOEXISTS(vp)) {
+			vinactive(vp);
 			simple_unlock(&vp->v_interlock);
-			tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vclean", 0);
-			/* return error */
-			return (ENOENT);
+			return (error);
 		}
+	}
+	if (vp->v_usecount > 0) {
+		simple_unlock(&vp->v_interlock);
+		return (error);
+	}
+	if (vp->v_usecount < 0)
+		panic("vget: negative usecount (%d)", vp->v_usecount);
+	vfree(vp);
+	simple_unlock(&vp->v_interlock);
+	return (error);
+}
 
-		if ((!UBCINFOEXISTS(vp)) && ISSET(vp->v_flag, VWASMAPPED)) {
-			simple_unlock(&vp->v_interlock);
-			ubc_info_init(vp);
-			simple_lock(&vp->v_interlock);
-		} else
-			panic("vget: stolen ubc_info");
+/*
+ * Get a pager reference on the particular vnode.
+ *
+ * This is called from ubc_info_init() and it is asumed that
+ * the vnode is not on the free list.
+ * It is also assumed that the vnode is neither being recycled
+ * by vgonel nor being terminated by vnode_pager_vrele().
+ *
+ * The vnode interlock is NOT held by the caller.
+ */
+__private_extern__ int
+vnode_pager_vget(vp)
+	struct vnode *vp;
+{
+	simple_lock(&vp->v_interlock);
+
+	UBCINFOCHECK("vnode_pager_vget", vp);
 
-		if (!ubc_issetflags(vp, UI_HASOBJREF))
-			if (ubc_getobject(vp, (UBC_NOREACTIVATE|UBC_HOLDOBJECT)))
-				panic("vget: null object");
+	if (ISSET(vp->v_flag, (VXLOCK|VORECLAIM|VTERMINATE)))
+		panic("%s: dying vnode", "vnode_pager_vget");
+
+	simple_lock(&vnode_free_list_slock);
+	/* The vnode should not be on free list */
+	if (VONLIST(vp)) {     
+		if (vp->v_usecount == 0)
+			panic("%s: still on list", "vnode_pager_vget");
+		else if (ISSET((vp)->v_flag, VUINACTIVE))
+			VREMINACTIVE("vnode_pager_vget", vp);
 	}
-out:
-	if ((flags & LK_INTERLOCK) == 0)
-		simple_unlock(&vp->v_interlock);
+
+	/* The vnode should not be on the inactive list here */
+	simple_unlock(&vnode_free_list_slock);
+
+	/* After all those checks, now do the real work :-) */
+	if (++vp->v_usecount <= 0)
+		panic("vnode_pager_vget: v_usecount");                     
+	simple_unlock(&vp->v_interlock);
+
 	return (0);
 }
 
@@ -1072,8 +1091,8 @@ vop_nolock(ap)
 	if (vp->v_vnlock == NULL) {
 		if ((flags & LK_TYPE_MASK) == LK_DRAIN)
 			return (0);
-		MALLOC_ZONE(vp->v_vnlock, struct lock__bsd__ *,
-				sizeof(struct lock__bsd__), M_VNODE, M_WAITOK);
+		MALLOC(vp->v_vnlock, struct lock__bsd__ *,
+				sizeof(struct lock__bsd__), M_TEMP, M_WAITOK);
 		lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
 	}
 	switch (flags & LK_TYPE_MASK) {
@@ -1153,21 +1172,41 @@ vref(vp)
 		panic("vref used where vget required");
 
 	/* If on the inactive list, remove it from there */
-	if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
-		if (VONLIST(vp)) {
-			simple_lock(&vnode_free_list_slock);
-			VREMINACTIVE("vref", vp);
-			simple_unlock(&vnode_free_list_slock);
-		}
-	}
-	/* The vnode should not be on the inactive list here */
-	VINACTIVECHECK("vref", vp, 0);
+	simple_lock(&vnode_free_list_slock);
+	if (ISSET((vp)->v_flag, VUINACTIVE))
+		VREMINACTIVE("vref", vp);
+	simple_unlock(&vnode_free_list_slock);
 
 	if (++vp->v_usecount <= 0)
 		panic("vref v_usecount");                     
 	simple_unlock(&vp->v_interlock);
 }
 
+static void
+clean_up_name_parent_ptrs(struct vnode *vp)
+{
+    if (VNAME(vp) || VPARENT(vp)) {
+	char *tmp1;
+	struct vnode *tmp2;
+
+	// do it this way so we don't block before clearing 
+	// these fields.
+	tmp1 = VNAME(vp);
+	tmp2 = VPARENT(vp);
+	VNAME(vp) = NULL;
+	VPARENT(vp) = NULL;
+	    
+	if (tmp1) {
+	    remove_name(tmp1);
+	}
+	    
+	if (tmp2) {
+	    vrele(tmp2);
+	}
+    }
+}
+
+
 /*
  * put the vnode on appropriate free list.
  * called with v_interlock held.
@@ -1176,6 +1215,13 @@ static void
 vfree(vp)
 	struct vnode *vp;
 {
+	funnel_t *curflock;
+	extern int disable_funnel;
+
+	if ((curflock = thread_funnel_get()) != kernel_flock &&
+	    !(disable_funnel && curflock != THR_FUNNEL_NULL))
+		panic("Entering vfree() without kernel funnel");
+
 	/*
 	 * if the vnode is not obtained by calling getnewvnode() we
 	 * are not responsible for the cleanup. Just return.
@@ -1190,8 +1236,11 @@ vfree(vp)
 	/* insert at tail of LRU list or at head if VAGE is set */
 	simple_lock(&vnode_free_list_slock);
 
+	// make sure the name & parent pointers get cleared out
+//	clean_up_name_parent_ptrs(vp);
+
 	if (VONLIST(vp))
-		 panic("vfree: vnode still on list");
+		 panic("%s: vnode still on list", "vfree");
 
 	if (vp->v_flag & VAGE) {
 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
@@ -1211,6 +1260,13 @@ static void
 vinactive(vp)
 	struct vnode *vp;
 {
+	funnel_t *curflock;
+	extern int disable_funnel;
+
+	if ((curflock = thread_funnel_get()) != kernel_flock &&
+	    !(disable_funnel && curflock != THR_FUNNEL_NULL))
+		panic("Entering vinactive() without kernel funnel");
+
 	if (!UBCINFOEXISTS(vp))
 		panic("vinactive: not a UBC vnode");
 
@@ -1220,7 +1276,7 @@ vinactive(vp)
 	simple_lock(&vnode_free_list_slock);
 
 	if (VONLIST(vp))
-		 panic("vinactive: vnode still on list");
+		 panic("%s: vnode still on list", "vinactive");
 	VINACTIVECHECK("vinactive", vp, 0);
 
 	TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_freelist);
@@ -1242,10 +1298,6 @@ vput(vp)
 {
 	struct proc *p = current_proc();	/* XXX */
 
-#if DIAGNOSTIC
-	if (vp == NULL)
-		panic("vput: null vp");
-#endif
 	simple_lock(&vp->v_interlock);
 	if (--vp->v_usecount == 1) {
 		if (UBCINFOEXISTS(vp)) {
@@ -1267,8 +1319,10 @@ vput(vp)
 			vp->v_usecount, vp->v_writecount);
 	}
 #endif
-	if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
-		VREMINACTIVE("vrele", vp);
+	simple_lock(&vnode_free_list_slock);
+	if (ISSET((vp)->v_flag, VUINACTIVE))
+		VREMINACTIVE("vref", vp);
+	simple_unlock(&vnode_free_list_slock);
 
 	simple_unlock(&vp->v_interlock);
 	VOP_INACTIVE(vp, p);
@@ -1297,15 +1351,18 @@ vrele(vp)
 	struct vnode *vp;
 {
 	struct proc *p = current_proc();	/* XXX */
+	funnel_t *curflock;
+	extern int disable_funnel;
+
+	if ((curflock = thread_funnel_get()) != kernel_flock &&
+	    !(disable_funnel && curflock != THR_FUNNEL_NULL))
+		panic("Entering vrele() without kernel funnel");
 
-#if DIAGNOSTIC
-	if (vp == NULL)
-		panic("vrele: null vp");
-#endif
 	simple_lock(&vp->v_interlock);
 	if (--vp->v_usecount == 1) {
 		if (UBCINFOEXISTS(vp)) {
-			vinactive(vp);
+			if ((vp->v_flag & VXLOCK) == 0)
+				vinactive(vp);
 			simple_unlock(&vp->v_interlock);
 			return;
 		}
@@ -1320,9 +1377,6 @@ vrele(vp)
 		panic("vrele: ref cnt");
 	}
 #endif
-	if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
-		VREMINACTIVE("vrele", vp);
-
 
 	if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
 		/* vnode is being cleaned, just return */
@@ -1361,7 +1415,6 @@ void
 vagevp(vp)
 	struct vnode *vp;
 {
-	assert(vp);
 	simple_lock(&vp->v_interlock);
 	vp->v_flag |= VAGE;
 	simple_unlock(&vp->v_interlock);
@@ -1417,7 +1470,7 @@ vflush(mp, skipvp, flags)
 	struct vnode *skipvp;
 	int flags;
 {
-	struct proc *p = current_proc();	/* XXX */
+	struct proc *p = current_proc();
 	struct vnode *vp, *nvp;
 	int busy = 0;
 
@@ -1435,9 +1488,9 @@ loop:
 
 		simple_lock(&vp->v_interlock);
 		/*
-		 * Skip over a vnodes marked VSYSTEM.
+		 * Skip over a vnodes marked VSYSTEM or VNOFLUSH.
 		 */
-		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
+		if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || (vp->v_flag & VNOFLUSH))) {
 			simple_unlock(&vp->v_interlock);
 			continue;
 		}
@@ -1492,7 +1545,7 @@ loop:
 		busy++;
 	}
 	simple_unlock(&mntvnode_slock);
-	if (busy)
+	if (busy && ((flags & FORCECLOSE)==0))
 		return (EBUSY);
 	return (0);
 }
@@ -1508,8 +1561,7 @@ vclean(vp, flags, p)
 	struct proc *p;
 {
 	int active;
-	void *obj;
-	int removed = 0;
+	int didhold;
 
 	/*
 	 * if the vnode is not obtained by calling getnewvnode() we
@@ -1526,9 +1578,23 @@ vclean(vp, flags, p)
 	 * so that its count cannot fall to zero and generate a
 	 * race against ourselves to recycle it.
 	 */
-	if (active = vp->v_usecount)
+	if (active = vp->v_usecount) {
+		/*
+		 * active vnode can not be on the free list.
+		 * we are about to take an extra reference on this vnode
+		 * do the queue management as needed
+		 * Not doing so can cause "still on list" or
+		 * "vnreclaim: v_usecount" panic if VOP_LOCK() blocks.
+		 */
+		simple_lock(&vnode_free_list_slock);
+		if (ISSET((vp)->v_flag, VUINACTIVE))
+			VREMINACTIVE("vclean", vp);
+		simple_unlock(&vnode_free_list_slock);
+
 		if (++vp->v_usecount <= 0)
 			panic("vclean: v_usecount");
+	}
+
 	/*
 	 * Prevent the vnode from being recycled or
 	 * brought into use while we clean it out.
@@ -1547,21 +1613,24 @@ vclean(vp, flags, p)
 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
 
 	/*
+	 * While blocked in VOP_LOCK() someone could have dropped
+	 * reference[s] and we could land on the inactive list.
 	 * if this vnode is on the inactive list 
 	 * take it off the list.
 	 */
-	if ((active == 1) && 
-		(ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))) {
-		simple_lock(&vnode_free_list_slock);
+	simple_lock(&vnode_free_list_slock);
+	if (ISSET((vp)->v_flag, VUINACTIVE))
 		VREMINACTIVE("vclean", vp);
-		simple_unlock(&vnode_free_list_slock);
-		removed++;
-	}
+	simple_unlock(&vnode_free_list_slock);
+
+	/* Clean the pages in VM. */
+	if (active && (flags & DOCLOSE))
+		VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
 
 	/* Clean the pages in VM. */
-	if ((active) && UBCINFOEXISTS(vp)) {
+	didhold = ubc_hold(vp);
+	if ((active) && (didhold))
 		(void)ubc_clean(vp, 0); /* do not invalidate */
-	}
 
 	/*
 	 * Clean out any buffers associated with the vnode.
@@ -1572,75 +1641,49 @@ vclean(vp, flags, p)
 		else
 			vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
 	}
-	/*
-	 * If purging an active vnode, it must be closed and
-	 * deactivated before being reclaimed. Note that the
-	 * VOP_INACTIVE will unlock the vnode.
-	 */
-	if (active) {
-		if (flags & DOCLOSE)
-			VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
+
+	if (active)
 		VOP_INACTIVE(vp, p);
-	} else {
-		/*
-		 * Any other processes trying to obtain this lock must first
-		 * wait for VXLOCK to clear, then call the new lock operation.
-		 */
+	else
 		VOP_UNLOCK(vp, 0, p);
+
+	/* Destroy ubc named reference */
+	if (didhold) {
+		ubc_rele(vp);
+		ubc_destroy_named(vp);
+	}
+	/*
+	 * Make sure vp isn't on the inactive list.
+	 */
+	simple_lock(&vnode_free_list_slock);
+	if (ISSET((vp)->v_flag, VUINACTIVE)) {
+		VREMINACTIVE("vclean", vp);
 	}
+	simple_unlock(&vnode_free_list_slock);
+
 	/*
 	 * Reclaim the vnode.
 	 */
 	if (VOP_RECLAIM(vp, p))
 		panic("vclean: cannot reclaim");
-	if (active)
-		vrele(vp);
+	
+	// make sure the name & parent ptrs get cleaned out!
+	clean_up_name_parent_ptrs(vp);
+
 	cache_purge(vp);
 	if (vp->v_vnlock) {
-		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
+		struct lock__bsd__ *tmp = vp->v_vnlock;
+		if ((tmp->lk_flags & LK_DRAINED) == 0)
 			vprint("vclean: lock not drained", vp);
-		FREE_ZONE(vp->v_vnlock, sizeof (struct lock__bsd__), M_VNODE);
 		vp->v_vnlock = NULL;
+		FREE(tmp, M_TEMP);
 	}
 
 	/* It's dead, Jim! */
 	vp->v_op = dead_vnodeop_p;
 	vp->v_tag = VT_NON;
 
-	/*
-	 * v_data is reclaimed by VOP_RECLAIM, all the vnode
-	 * operation generated by the code below would be directed
-	 * to the deadfs
-	 */
-	if (UBCINFOEXISTS(vp)) {
-		/* vnode is dying, destroy the object */
-		if (ubc_issetflags(vp, UI_HASOBJREF)) {
-			obj = ubc_getobject(vp, UBC_NOREACTIVATE);
-			if (obj == NULL)
-				panic("vclean: null object");
-			if (ISSET(vp->v_flag, VTERMINATE))
-				panic("vclean: already teminating");
-			SET(vp->v_flag, VTERMINATE);
-
-			ubc_clearflags(vp, UI_HASOBJREF);
-			memory_object_destroy(obj, 0);
-
-			/* 
-			 * memory_object_destroy() is asynchronous with respect
-			 * to vnode_pager_no_senders().
-			 * wait for vnode_pager_no_senders() to clear
-			 * VTERMINATE
-			 */
-			while (ISSET(vp->v_flag, VTERMINATE)) {
-				SET(vp->v_flag, VTERMWANT);
-				tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vclean", 0);
-			}
-			if (UBCINFOEXISTS(vp)) {
-				ubc_info_free(vp);
-				vp->v_ubcinfo = UBC_NOINFO;  /* catch bad accesses */
-			}
-		}
-	}
+	insmntque(vp, (struct mount *)0);
 
 	/*
 	 * Done with purge, notify sleepers of the grim news.
@@ -1650,6 +1693,9 @@ vclean(vp, flags, p)
 		vp->v_flag &= ~VXWANT;
 		wakeup((caddr_t)vp);
 	}
+
+	if (active)
+		vrele(vp);
 }
 
 /*
@@ -1664,7 +1710,7 @@ vop_revoke(ap)
 	} */ *ap;
 {
 	struct vnode *vp, *vq;
-	struct proc *p = current_proc();	/* XXX */
+	struct proc *p = current_proc();
 
 #if DIAGNOSTIC
 	if ((ap->a_flags & REVOKEALL) == 0)
@@ -1683,7 +1729,7 @@ vop_revoke(ap)
 			while (vp->v_flag & VXLOCK) {
 				vp->v_flag |= VXWANT;
 				simple_unlock(&vp->v_interlock);
-				tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
+				(void)tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
 			}
 			return (0);
 		}
@@ -1748,7 +1794,7 @@ void
 vgone(vp)
 	struct vnode *vp;
 {
-	struct proc *p = current_proc();	/* XXX */
+	struct proc *p = current_proc();
 
 	simple_lock(&vp->v_interlock);
 	vgonel(vp, p);
@@ -1782,7 +1828,7 @@ vgonel(vp, p)
 		while (vp->v_flag & VXLOCK) {
 			vp->v_flag |= VXWANT;
 			simple_unlock(&vp->v_interlock);
-			tsleep((caddr_t)vp, PINOD, "vgone", 0);
+			(void)tsleep((caddr_t)vp, PINOD, "vgone", 0);
 		}
 		return;
 	}
@@ -1830,8 +1876,11 @@ vgonel(vp, p)
 			vp->v_flag &= ~VALIASED;
 		}
 		simple_unlock(&spechash_slock);
-		FREE_ZONE(vp->v_specinfo, sizeof (struct specinfo), M_VNODE);
+		{
+		struct specinfo *tmp = vp->v_specinfo;
 		vp->v_specinfo = NULL;
+		FREE_ZONE((void *)tmp, sizeof(struct specinfo), M_SPECINFO);
+		}
 	}
 	/*
 	 * If it is on the freelist and not already at the head,
@@ -1846,7 +1895,7 @@ vgonel(vp, p)
 	 * getnewvnode after removing it from the freelist to ensure
 	 * that we do not try to move it here.
 	 */
-	if (vp->v_usecount == 0) {
+	if (vp->v_usecount == 0 && (vp->v_flag & VUINACTIVE) == 0) {
 		simple_lock(&vnode_free_list_slock);
 		if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
 		    vnode_free_list.tqh_first != vp) {
@@ -1941,6 +1990,8 @@ vprint(label, vp)
 		strcat(buf, "|VTEXT");
 	if (vp->v_flag & VSYSTEM)
 		strcat(buf, "|VSYSTEM");
+	if (vp->v_flag & VNOFLUSH)
+		strcat(buf, "|VNOFLUSH");
 	if (vp->v_flag & VXLOCK)
 		strcat(buf, "|VXLOCK");
 	if (vp->v_flag & VXWANT)
@@ -1967,7 +2018,7 @@ vprint(label, vp)
 void
 printlockedvnodes()
 {
-	struct proc *p = current_proc();	/* XXX */
+	struct proc *p = current_proc();
 	struct mount *mp, *nmp;
 	struct vnode *vp;
 
@@ -1992,30 +2043,110 @@ printlockedvnodes()
 }
 #endif
 
-/*
- * Top level filesystem related information gathering.
- */
-int
-vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
-	int *name;
-	u_int namelen;
+static int
+build_path(struct vnode *vp, char *buff, int buflen, int *outlen)
+{
+    char *end, *str;
+    int   i, len, ret=0, counter=0;
+
+    end = &buff[buflen-1];
+    *--end = '\0';
+
+    while(vp && VPARENT(vp) != vp) {
+	// the maximum depth of a file system hierarchy is MAXPATHLEN/2
+	// (with single-char names separated by slashes).  we panic if
+	// we've ever looped more than that.
+	if (counter++ > MAXPATHLEN/2) {
+	    panic("build_path: vnode parent chain is too long! vp 0x%x\n", vp);
+	}
+	str = VNAME(vp);
+	if (VNAME(vp) == NULL) {
+	    if (VPARENT(vp) != NULL) {
+		ret = EINVAL;
+	    }
+	    break;
+	}
+	
+	// count how long the string is
+	for(len=0; *str; str++, len++)
+	    /* nothing */;
+
+	// check that there's enough space
+	if ((end - buff) < len) {
+	    ret = ENOSPC;
+	    break;
+	}
+
+	// copy it backwards
+	for(; len > 0; len--) {
+	    *--end = *--str;
+	}
+
+	// put in the path separator
+	*--end = '/';
+
+	// walk up the chain.  
+	vp = VPARENT(vp);
+
+	// check if we're crossing a mount point and
+	// switch the vp if we are.
+	if (vp && (vp->v_flag & VROOT)) {
+	    vp = vp->v_mount->mnt_vnodecovered;
+	}
+    }
+
+    // slide it down to the beginning of the buffer
+    memmove(buff, end, &buff[buflen] - end);
+    
+    *outlen = &buff[buflen] - end;
+ 
+    return ret;
+}
+
+__private_extern__ int
+vn_getpath(struct vnode *vp, char *pathbuf, int *len)
+{
+    return build_path(vp, pathbuf, *len, len);
+}
+
+
+
+/*
+ * Top level filesystem related information gathering.
+ */
+int
+vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+	int *name;
+	u_int namelen;
 	void *oldp;
 	size_t *oldlenp;
 	void *newp;
 	size_t newlen;
 	struct proc *p;
 {
-	struct ctldebug *cdp;
 	struct vfsconf *vfsp;
+	int *username;
+	u_int usernamelen;
+	int error;
 
-	if (name[0] == VFS_NUMMNTOPS) {
+	/*
+	 * The VFS_NUMMNTOPS shouldn't be at name[0] since
+	 * is a VFS generic variable. So now we must check
+	 * namelen so we don't end up covering any UFS
+	 * variables (sinc UFS vfc_typenum is 1).
+	 *
+	 * It should have been:
+	 *    name[0]:  VFS_GENERIC
+	 *    name[1]:  VFS_NUMMNTOPS
+	 */
+	if (namelen == 1 && name[0] == VFS_NUMMNTOPS) {
 		extern unsigned int vfs_nummntops;
 		return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops));
 	}
 
 	/* all sysctl names at this level are at least name and field */
 	if (namelen < 2)
-		return (ENOTDIR);		/* overloaded */
+		return (EISDIR);		/* overloaded */
 	if (name[0] != VFS_GENERIC) {
 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 			if (vfsp->vfc_typenum == name[0])
@@ -2039,7 +2170,19 @@ vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
 		return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
 		    sizeof(struct vfsconf)));
 	}
-	return (EOPNOTSUPP);
+	/*
+	 * We need to get back into the general MIB, so we need to re-prepend
+	 * CTL_VFS to our name and try userland_sysctl().
+	 */
+	usernamelen = namelen + 1;
+	MALLOC(username, int *, usernamelen * sizeof(*username),
+	    M_TEMP, M_WAITOK);
+	bcopy(name, username + 1, namelen * sizeof(*name));
+	username[0] = CTL_VFS;
+	error = userland_sysctl(p, username, usernamelen, oldp, oldlenp, 1,
+	    newp, newlen, oldlenp);
+	FREE(username, M_TEMP);
+	return (error);
 }
 
 int kinfo_vdebug = 1;
@@ -2096,13 +2239,16 @@ again:
 			nvp = vp->v_mntvnodes.le_next;
 			if (bp + VPTRSZ + VNODESZ > ewhere) {
 				simple_unlock(&mntvnode_slock);
+				vfs_unbusy(mp, p);
 				*sizep = bp - where;
 				return (ENOMEM);
 			}
 			simple_unlock(&mntvnode_slock);
 			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
-			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
+			    (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) {
+				vfs_unbusy(mp, p);
 				return (error);
+			}
 			bp += VPTRSZ + VNODESZ;
 			simple_lock(&mntvnode_slock);
 		}
@@ -2149,11 +2295,11 @@ vfs_mountedon(vp)
  * Unmount all filesystems. The list is traversed in reverse order
  * of mounting to avoid dependencies.
  */
-void
+__private_extern__ void
 vfs_unmountall()
 {
 	struct mount *mp, *nmp;
-	struct proc *p = current_proc();	/* XXX */
+	struct proc *p = current_proc();
 
 	/*
 	 * Since this only runs when rebooting, it is not interlocked.
@@ -2166,7 +2312,7 @@ vfs_unmountall()
 
 /*
  * Build hash lists of net addresses and hang them off the mount point.
- * Called by ufs_mount() to set up the lists of export addresses.
+ * Called by vfs_export() to set up the lists of export addresses.
  */
 static int
 vfs_hang_addrlist(mp, nep, argp)
@@ -2343,7 +2489,7 @@ vfs_export_lookup(mp, nep, nam)
  * try to reclaim vnodes from the memory 
  * object cache
  */
-int
+static int
 vm_object_cache_reclaim(int count)
 {
 	int cnt;
@@ -2360,11 +2506,10 @@ vm_object_cache_reclaim(int count)
  * and then try to reclaim some vnodes from the memory 
  * object cache
  */
-int
+static int
 vnreclaim(int count)
 {
-	int cnt, i, loopcnt;
-	void *obj;
+	int i, loopcnt;
 	struct vnode *vp;
 	int err;
 	struct proc *p;
@@ -2390,163 +2535,133 @@ restart:
 	for (vp = TAILQ_FIRST(&vnode_inactive_list);
 			(vp != NULLVP) && (i < count);
 			vp = TAILQ_NEXT(vp, v_freelist)) {
+		
+		if (!simple_lock_try(&vp->v_interlock))
+			continue;
 
-		if (simple_lock_try(&vp->v_interlock)) {
-			if (vp->v_usecount != 1)
-				panic("vnreclaim: v_usecount");
-
-			if(!UBCINFOEXISTS(vp)) {
-				if (vp->v_type == VBAD) {
-					VREMINACTIVE("vnreclaim", vp);
-					simple_unlock(&vp->v_interlock);
-					continue;
-				} else
-					panic("non UBC vnode on inactive list");
-				/* Should not reach here */
-			}
+		if (vp->v_usecount != 1)
+			panic("vnreclaim: v_usecount");
 
-			/* If vnode is already being reclaimed, wait */
-			if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
-				vp->v_flag |= VXWANT;
+		if(!UBCINFOEXISTS(vp)) {
+			if (vp->v_type == VBAD) {
+				VREMINACTIVE("vnreclaim", vp);
 				simple_unlock(&vp->v_interlock);
-				simple_unlock(&vnode_free_list_slock);
-				(void)tsleep((caddr_t)vp, PINOD, "vocr", 0);
-				goto restart;
-			}
+				continue;
+			} else
+				panic("non UBC vnode on inactive list");
+				/* Should not reach here */
+		}
 
-			VREMINACTIVE("vnreclaim", vp);
+		/* If vnode is already being reclaimed, wait */
+		if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
+			vp->v_flag |= VXWANT;
+			simple_unlock(&vp->v_interlock);
 			simple_unlock(&vnode_free_list_slock);
+			(void)tsleep((caddr_t)vp, PINOD, "vocr", 0);
+			goto restart;
+		}
 
-			/* held vnodes must not be reclaimed */
-			if (vp->v_ubcinfo->ui_holdcnt)  { /* XXX */
-				vinactive(vp);
-				simple_unlock(&vp->v_interlock);
-				goto restart;
-			}
+		/*
+		 * if the vnode is being initialized,
+		 * skip over it
+		 */
+		if (ISSET(vp->v_flag,  VUINIT)) {
+			SET(vp->v_flag, VUWANT);
+			simple_unlock(&vp->v_interlock);
+			continue;
+		}
 
-			if (ubc_issetflags(vp, UI_WASMAPPED)) {
-				/*
-				 * We should not reclaim as it is likely
-				 * to be in use. Let it die a natural death.
-				 * Release the UBC reference if one exists
-				 * and put it back at the tail.
-				 */
-				if (ubc_issetflags(vp, UI_HASOBJREF)) {
-					obj = ubc_getobject(vp, UBC_NOREACTIVATE);
-					if (obj == NULL)
-						panic("vnreclaim: null object");
-					/* release the reference gained by ubc_info_init() */
-					ubc_clearflags(vp, UI_HASOBJREF);
-					simple_unlock(&vp->v_interlock);
-					vm_object_deallocate(obj);
-					/*
-					 * The vnode interlock was release. 
-					 * vm_object_deallocate() might have blocked.
-					 * It is possible that the object was terminated.
-					 * It is also possible that the vnode was
-					 * reactivated. Evaluate the state again.
-					 */ 
-					if (UBCINFOEXISTS(vp)) {
-						simple_lock(&vp->v_interlock);
-						if ((vp->v_usecount == 1) && !VONLIST(vp))
-							vinactive(vp);
-						simple_unlock(&vp->v_interlock);
-					}
-				} else {
-					vinactive(vp);
-					simple_unlock(&vp->v_interlock);
-				}
-			} else {
-				VORECLAIM_ENABLE(vp);
+		VREMINACTIVE("vnreclaim", vp);
+		simple_unlock(&vnode_free_list_slock);
 
-				/*
-				 * scrub the dirty pages and invalidate the buffers
-				 */
-				p = current_proc();
-				err = vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p); 
-				if (err) {
-					/* cannot reclaim */
+		if (ubc_issetflags(vp, UI_WASMAPPED)) {
+			/*
+			 * We should not reclaim as it is likely
+			 * to be in use. Let it die a natural death.
+			 * Release the UBC reference if one exists
+			 * and put it back at the tail.
+			 */
+			simple_unlock(&vp->v_interlock);
+			if (ubc_release_named(vp)) {
+				if (UBCINFOEXISTS(vp)) {
 					simple_lock(&vp->v_interlock);
-					vinactive(vp);
-					VORECLAIM_DISABLE(vp);
+					if (vp->v_usecount == 1 && !VONLIST(vp))
+						vinactive(vp);
 					simple_unlock(&vp->v_interlock);
-					goto restart;
 				}
+			} else {
+			    simple_lock(&vp->v_interlock);
+				vinactive(vp);
+				simple_unlock(&vp->v_interlock);
+			}
+		} else {
+			int didhold;
+
+			VORECLAIM_ENABLE(vp);
+
+			/*
+			 * scrub the dirty pages and invalidate the buffers
+			 */
+			p = current_proc();
+			err = vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p); 
+			if (err) {
+				/* cannot reclaim */
 				simple_lock(&vp->v_interlock);
-				if(vp->v_usecount != 1)
-					panic("VOCR: usecount race");
+				vinactive(vp);
+				VORECLAIM_DISABLE(vp);
+				i++;
 				simple_unlock(&vp->v_interlock);
+				goto restart;
+			}
 
-				/*
-				 * If the UBC reference on the memory object
-				 * was already lost, regain it. This will
-				 * keep the memory object alive for rest of the
-				 * reclaim and finally this reference would
-				 * be lost by memory_object_destroy()
-				 */
-				obj = ubc_getobject(vp, (UBC_NOREACTIVATE|UBC_HOLDOBJECT));
-				if (obj == (void *)NULL)
-					panic("vnreclaim: null object");
+			/* keep the vnode alive so we can kill it */
+			simple_lock(&vp->v_interlock);
+			if(vp->v_usecount != 1)
+				panic("VOCR: usecount race");
+			vp->v_usecount++;
+			simple_unlock(&vp->v_interlock);
 
-				/* clean up the state in VM without invalidating */
+			/* clean up the state in VM without invalidating */
+			didhold = ubc_hold(vp);
+			if (didhold)
 				(void)ubc_clean(vp, 0);
 
-				/* flush and invalidate buffers associated with the vnode */
-				if (vp->v_tag == VT_NFS)
-					nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
-				else
-					vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
-
-				/*
-				 * It is not necessary to call ubc_uncache()
-				 * here because memory_object_destroy() marks
-				 * the memory object non cachable already
-				 *
-				 * Need to release the vnode lock before calling
-				 * vm_object_deallocate() to avoid deadlock
-				 * when the vnode goes through vop_inactive
-				 *
-				 * Note: for the v_usecount == 1 case, VOP_INACTIVE
-				 * has not yet been called.  Call it now while vp is
-				 * still locked, it will also release the lock.
-				 */
-				if (vp->v_usecount == 1)
-					VOP_INACTIVE(vp, p);
-				else
-					VOP_UNLOCK(vp, 0, p);
-
-				/* 
-				 * This vnode is ready to be reclaimed.
-				 * Terminate the memory object.
-				 * memory_object_destroy() will result in
-				 * vnode_pager_no_senders(). 
-				 * That will release the pager reference
-				 * and the vnode will move to the free list.
-				 */
-				if (ISSET(vp->v_flag, VTERMINATE))
-					panic("vnreclaim: already teminating");
-				SET(vp->v_flag, VTERMINATE);
+			/* flush and invalidate buffers associated with the vnode */
+			if (vp->v_tag == VT_NFS)
+				nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
+			else
+				vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
 
-				memory_object_destroy(obj, 0);
+			/*
+			 * Note: for the v_usecount == 2 case, VOP_INACTIVE
+			 * has not yet been called.  Call it now while vp is
+			 * still locked, it will also release the lock.
+			 */
+			if (vp->v_usecount == 2)
+				VOP_INACTIVE(vp, p);
+			else
+				VOP_UNLOCK(vp, 0, p);
 
-				/* 
-				 * memory_object_destroy() is asynchronous with respect
-				 * to vnode_pager_no_senders().
-				 * wait for vnode_pager_no_senders() to clear
-				 * VTERMINATE
-				 */
-				while (ISSET(vp->v_flag, VTERMINATE)) {
-					SET(vp->v_flag, VTERMWANT);
-					tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vnreclaim", 0);
-				}
-				simple_lock(&vp->v_interlock);
-				VORECLAIM_DISABLE(vp);
-				i++;
-				simple_unlock(&vp->v_interlock);
+			if (didhold)
+				ubc_rele(vp);
+
+			/*
+			 * destroy the ubc named reference.
+			 * If we can't because it is held for I/Os
+			 * in progress, just put it back on the inactive
+			 * list and move on.  Otherwise, the paging reference
+			 * is toast (and so is this vnode?).
+			 */
+			if (ubc_destroy_named(vp)) {
+			    i++;
 			}
-			/* inactive list lock was released, must restart */
-			goto restart;
+			simple_lock(&vp->v_interlock);
+			VORECLAIM_DISABLE(vp);
+			simple_unlock(&vp->v_interlock);
+			vrele(vp);  /* release extra use we added here */
 		}
+		/* inactive list lock was released, must restart */
+		goto restart;
 	}
 	simple_unlock(&vnode_free_list_slock);
 
@@ -2566,16 +2681,13 @@ out:
  * AGE the vnode so that it gets recycled quickly.
  * Check lock status to decide whether to call vput() or vrele().
  */
-void
+__private_extern__ void
 vnode_pager_vrele(struct vnode *vp)
 {
 
 	boolean_t 	funnel_state;
 	int isvnreclaim = 1;
 
-	if (vp == (struct vnode *) NULL) 
-		panic("vnode_pager_vrele: null vp");
-
 	funnel_state = thread_funnel_set(kernel_flock, TRUE);
 
 	/* Mark the vnode to be recycled */
@@ -2612,23 +2724,26 @@ vnode_pager_vrele(struct vnode *vp)
 	}
 	if (!ISSET(vp->v_flag, VTERMINATE))
 		SET(vp->v_flag, VTERMINATE);
+
+	cache_purge(vp);
+
 	if (UBCINFOEXISTS(vp)) {
+		struct ubc_info *uip = vp->v_ubcinfo;
+
 		if (ubc_issetflags(vp, UI_WASMAPPED))
 			SET(vp->v_flag, VWASMAPPED);
 
-		if ((vp->v_ubcinfo->ui_holdcnt) /* XXX */
-			&& !(vp->v_flag & VXLOCK))
-			panic("vnode_pager_vrele: freeing held ubc_info");
-
-		simple_unlock(&vp->v_interlock);
-		ubc_info_free(vp);
 		vp->v_ubcinfo = UBC_NOINFO;  /* catch bad accesses */
+		simple_unlock(&vp->v_interlock);
+		ubc_info_deallocate(uip);
 	} else {
 		if ((vp->v_type == VBAD) && ((vp)->v_ubcinfo != UBC_INFO_NULL) 
 			&& ((vp)->v_ubcinfo != UBC_NOINFO)) {
-			simple_unlock(&vp->v_interlock);
-			ubc_info_free(vp);
+			struct ubc_info *uip = vp->v_ubcinfo;
+
 			vp->v_ubcinfo = UBC_NOINFO;  /* catch bad accesses */
+			simple_unlock(&vp->v_interlock);
+			ubc_info_deallocate(uip);
 		} else {
 			simple_unlock(&vp->v_interlock);
 		}
@@ -2659,7 +2774,6 @@ int walk_vnodes_debug=0;
 void
 walk_allvnodes()
 {
-	struct proc *p = current_proc();	/* XXX */
 	struct mount *mp, *nmp;
 	struct vnode *vp;
 	int cnt = 0;
@@ -2697,3 +2811,482 @@ walk_allvnodes()
 	printf("%d - inactive\n", cnt);
 }
 #endif /* DIAGNOSTIC */
+
+
+struct x_constraints {
+        u_int32_t x_maxreadcnt;
+        u_int32_t x_maxsegreadsize;
+        u_int32_t x_maxsegwritesize;
+};
+
+
+void
+vfs_io_attributes(vp, flags, iosize, vectors)
+	struct vnode	*vp;
+	int	flags;	/* B_READ or B_WRITE */
+	int	*iosize;
+	int	*vectors;
+{
+	struct mount *mp;
+
+	/* start with "reasonable" defaults */
+	*iosize = MAXPHYS;
+	*vectors = 32;
+
+	mp = vp->v_mount;
+	if (mp != NULL) {
+		switch (flags) {
+		case B_READ:
+		        if (mp->mnt_kern_flag & MNTK_IO_XINFO)
+			        *iosize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxreadcnt;
+			else
+			        *iosize = mp->mnt_maxreadcnt;
+			*vectors = mp->mnt_segreadcnt;
+			break;
+		case B_WRITE:
+			*iosize = mp->mnt_maxwritecnt;
+			*vectors = mp->mnt_segwritecnt;
+			break;
+		default:
+			break;
+		}
+		if (*iosize == 0)
+		        *iosize = MAXPHYS;
+		if (*vectors == 0)
+		        *vectors = 32;
+	}
+	return;
+}
+
+__private_extern__
+void
+vfs_io_maxsegsize(vp, flags, maxsegsize)
+	struct vnode	*vp;
+	int	flags;	/* B_READ or B_WRITE */
+	int	*maxsegsize;
+{
+	struct mount *mp;
+
+	/* start with "reasonable" default */
+	*maxsegsize = MAXPHYS;
+
+	mp = vp->v_mount;
+	if (mp != NULL) {
+		switch (flags) {
+		case B_READ:
+		        if (mp->mnt_kern_flag & MNTK_IO_XINFO)
+			        *maxsegsize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegreadsize;
+			else
+			        /*
+				 * if the extended info doesn't exist
+				 * then use the maxread I/O size as the 
+				 * max segment size... this is the previous behavior
+				 */
+			        *maxsegsize = mp->mnt_maxreadcnt;
+			break;
+		case B_WRITE:
+		        if (mp->mnt_kern_flag & MNTK_IO_XINFO)
+			        *maxsegsize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegwritesize;
+			else
+			        /*
+				 * if the extended info doesn't exist
+				 * then use the maxwrite I/O size as the 
+				 * max segment size... this is the previous behavior
+				 */
+			        *maxsegsize = mp->mnt_maxwritecnt;
+			break;
+		default:
+			break;
+		}
+		if (*maxsegsize == 0)
+		        *maxsegsize = MAXPHYS;
+	}
+}
+
+
+#include <sys/disk.h>
+
+
+int
+vfs_init_io_attributes(devvp, mp)
+	struct vnode *devvp;
+	struct mount *mp;
+{
+	int error;
+	off_t readblockcnt;
+	off_t writeblockcnt;
+	off_t readmaxcnt;
+	off_t writemaxcnt;
+	off_t readsegcnt;
+	off_t writesegcnt;
+	off_t readsegsize;
+	off_t writesegsize;
+	u_long blksize;
+
+	u_int64_t temp;
+
+	struct proc *p = current_proc();
+	struct  ucred *cred = p->p_ucred;
+
+	int isvirtual = 0;
+	/*
+	 * determine if this mount point exists on the same device as the root
+	 * partition... if so, then it comes under the hard throttle control
+	 */
+	int        thisunit = -1;
+	static int rootunit = -1;
+	extern struct vnode *rootvp;
+
+	if (rootunit == -1) {
+	        if (VOP_IOCTL(rootvp, DKIOCGETBSDUNIT, (caddr_t)&rootunit, 0, cred, p))
+		        rootunit = -1; 
+		else if (rootvp == devvp)
+		        mp->mnt_kern_flag |= MNTK_ROOTDEV;
+	}
+	if (devvp != rootvp && rootunit != -1) {
+	        if (VOP_IOCTL(devvp, DKIOCGETBSDUNIT, (caddr_t)&thisunit, 0, cred, p) == 0) {
+		        if (thisunit == rootunit)
+			        mp->mnt_kern_flag |= MNTK_ROOTDEV;
+		}
+	}
+	if (VOP_IOCTL(devvp, DKIOCGETISVIRTUAL, (caddr_t)&isvirtual, 0, cred, p) == 0) {
+	        if (isvirtual)
+		        mp->mnt_kern_flag |= MNTK_VIRTUALDEV;
+	}
+
+	if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD,
+				(caddr_t)&readblockcnt, 0, cred, p)))
+		return (error);
+
+	if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE,
+				(caddr_t)&writeblockcnt, 0, cred, p)))
+		return (error);
+
+	if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTREAD,
+				(caddr_t)&readmaxcnt, 0, cred, p)))
+		return (error);
+
+	if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTWRITE,
+				(caddr_t)&writemaxcnt, 0, cred, p)))
+		return (error);
+
+	if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD,
+				(caddr_t)&readsegcnt, 0, cred, p)))
+		return (error);
+
+	if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE,
+				(caddr_t)&writesegcnt, 0, cred, p)))
+		return (error);
+
+	if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTREAD,
+				(caddr_t)&readsegsize, 0, cred, p)))
+		return (error);
+
+	if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTWRITE,
+				(caddr_t)&writesegsize, 0, cred, p)))
+		return (error);
+
+	if ((error = VOP_IOCTL(devvp, DKIOCGETBLOCKSIZE,
+				(caddr_t)&blksize, 0, cred, p)))
+		return (error);
+
+
+        if ( !(mp->mnt_kern_flag & MNTK_IO_XINFO)) {
+		MALLOC(mp->mnt_xinfo_ptr, void *, sizeof(struct x_constraints), M_TEMP, M_WAITOK);
+	        mp->mnt_kern_flag |= MNTK_IO_XINFO;
+	}
+
+	if (readmaxcnt)
+	        temp = (readmaxcnt > UINT32_MAX) ? UINT32_MAX : readmaxcnt;
+	else {
+	        if (readblockcnt) {
+		        temp = readblockcnt * blksize;
+			temp = (temp > UINT32_MAX) ? UINT32_MAX : temp;
+		} else
+		        temp = MAXPHYS;
+	}
+	((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxreadcnt = (u_int32_t)temp;
+
+	if (writemaxcnt)
+	        temp = (writemaxcnt > UINT32_MAX) ? UINT32_MAX : writemaxcnt;
+	else {
+	        if (writeblockcnt) {
+		        temp = writeblockcnt * blksize;
+			temp = (temp > UINT32_MAX) ? UINT32_MAX : temp;
+		} else
+		        temp = MAXPHYS;
+	}
+	mp->mnt_maxwritecnt = (u_int32_t)temp;
+
+	if (readsegcnt) {
+	        temp = (readsegcnt > UINT16_MAX) ? UINT16_MAX : readsegcnt;
+		mp->mnt_segreadcnt = (u_int16_t)temp;
+	}
+	if (writesegcnt) {
+	        temp = (writesegcnt > UINT16_MAX) ? UINT16_MAX : writesegcnt;
+		mp->mnt_segwritecnt = (u_int16_t)temp;
+	}
+	if (readsegsize)
+	        temp = (readsegsize > UINT32_MAX) ? UINT32_MAX : readsegsize;
+	else
+	        temp = mp->mnt_maxreadcnt;
+	((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegreadsize = (u_int32_t)temp;
+
+	if (writesegsize)
+	        temp = (writesegsize > UINT32_MAX) ? UINT32_MAX : writesegsize;
+	else
+	        temp = mp->mnt_maxwritecnt;
+	((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegwritesize = (u_int32_t)temp;
+
+	return (error);
+}
+
+static struct klist fs_klist;
+
+void
+vfs_event_init(void)
+{
+
+	klist_init(&fs_klist);
+}
+
+void
+vfs_event_signal(fsid_t *fsid, u_int32_t event, intptr_t data)
+{
+
+	KNOTE(&fs_klist, event);
+}
+
+/*
+ * return the number of mounted filesystems.
+ */
+static int
+sysctl_vfs_getvfscnt(void)
+{
+	struct mount *mp;
+	int ret = 0;
+
+	simple_lock(&mountlist_slock);
+	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list)
+	    ret++;
+	simple_unlock(&mountlist_slock);
+	return (ret);
+}
+
+/*
+ * fill in the array of fsid_t's up to a max of 'count', the actual
+ * number filled in will be set in '*actual'.  If there are more fsid_t's
+ * than room in fsidlst then ENOMEM will be returned and '*actual' will
+ * have the actual count.
+ * having *actual filled out even in the error case is depended upon.
+ */
+static int
+sysctl_vfs_getvfslist(fsid_t *fsidlst, int count, int *actual)
+{
+	struct mount *mp;
+
+	*actual = 0;
+	simple_lock(&mountlist_slock);
+	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
+		(*actual)++;
+		if (*actual <= count)
+			fsidlst[(*actual) - 1] = mp->mnt_stat.f_fsid;
+	}
+	simple_unlock(&mountlist_slock);
+	return (*actual <= count ? 0 : ENOMEM);
+}
+
+static int
+sysctl_vfs_vfslist SYSCTL_HANDLER_ARGS
+{
+	int actual, error;
+	size_t space;
+	fsid_t *fsidlst;
+
+	/* This is a readonly node. */
+	if (req->newptr != NULL)
+		return (EPERM);
+
+	/* they are querying us so just return the space required. */
+	if (req->oldptr == NULL) {
+		req->oldidx = sysctl_vfs_getvfscnt() * sizeof(fsid_t);
+		return 0;
+	}
+again:
+	/*
+	 * Retrieve an accurate count of the amount of space required to copy
+	 * out all the fsids in the system.
+	 */
+	space = req->oldlen;
+	req->oldlen = sysctl_vfs_getvfscnt() * sizeof(fsid_t);
+
+	/* they didn't give us enough space. */
+	if (space < req->oldlen)
+		return (ENOMEM);
+
+	MALLOC(fsidlst, fsid_t *, req->oldlen, M_TEMP, M_WAITOK);
+	error = sysctl_vfs_getvfslist(fsidlst, req->oldlen / sizeof(fsid_t),
+	    &actual);
+	/*
+	 * If we get back ENOMEM, then another mount has been added while we
+	 * slept in malloc above.  If this is the case then try again.
+	 */
+	if (error == ENOMEM) {
+		FREE(fsidlst, M_TEMP);
+		req->oldlen = space;
+		goto again;
+	}
+	if (error == 0) {
+		error = SYSCTL_OUT(req, fsidlst, actual * sizeof(fsid_t));
+	}
+	FREE(fsidlst, M_TEMP);
+	return (error);
+}
+
+/*
+ * Do a sysctl by fsid.
+ */
+static int
+sysctl_vfs_ctlbyfsid SYSCTL_HANDLER_ARGS
+{
+	struct vfsidctl vc;
+	struct mount *mp;
+	struct statfs *sp;
+	struct proc *p;
+	int *name;
+	int error, flags, namelen;
+
+	name = arg1;
+	namelen = arg2;
+	p = req->p;
+
+	error = SYSCTL_IN(req, &vc, sizeof(vc));
+	if (error)
+		return (error);
+	if (vc.vc_vers != VFS_CTL_VERS1)
+		return (EINVAL);
+	mp = vfs_getvfs(&vc.vc_fsid);
+	if (mp == NULL)
+		return (ENOENT);
+	/* reset so that the fs specific code can fetch it. */
+	req->newidx = 0;
+	/*
+	 * Note if this is a VFS_CTL then we pass the actual sysctl req
+	 * in for "oldp" so that the lower layer can DTRT and use the
+	 * SYSCTL_IN/OUT routines.
+	 */
+	if (mp->mnt_op->vfs_sysctl != NULL) {
+		error = mp->mnt_op->vfs_sysctl(name, namelen,
+		    req, NULL, NULL, 0, req->p);
+		if (error != EOPNOTSUPP)
+			return (error);
+	}
+	switch (name[0]) {
+	case VFS_CTL_UMOUNT:
+		VCTLTOREQ(&vc, req);
+		error = SYSCTL_IN(req, &flags, sizeof(flags));
+		if (error)
+			break;
+		error = safedounmount(mp, flags, p);
+		break;
+	case VFS_CTL_STATFS:
+		VCTLTOREQ(&vc, req);
+		error = SYSCTL_IN(req, &flags, sizeof(flags));
+		if (error)
+			break;
+		sp = &mp->mnt_stat;
+		if (((flags & MNT_NOWAIT) == 0 || (flags & MNT_WAIT)) &&
+		    (error = VFS_STATFS(mp, sp, p)))
+			return (error);
+		sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+		error = SYSCTL_OUT(req, sp, sizeof(*sp));
+		break;
+	default:
+		return (EOPNOTSUPP);
+	}
+	return (error);
+}
+
+static int	filt_fsattach(struct knote *kn);
+static void	filt_fsdetach(struct knote *kn);
+static int	filt_fsevent(struct knote *kn, long hint);
+
+struct filterops fs_filtops =
+	{ 0, filt_fsattach, filt_fsdetach, filt_fsevent };
+
+static int
+filt_fsattach(struct knote *kn)
+{
+
+	kn->kn_flags |= EV_CLEAR;
+	KNOTE_ATTACH(&fs_klist, kn);
+	return (0);
+}
+
+static void
+filt_fsdetach(struct knote *kn)
+{
+
+	KNOTE_DETACH(&fs_klist, kn);
+}
+
+static int
+filt_fsevent(struct knote *kn, long hint)
+{
+
+	kn->kn_fflags |= hint;
+	return (kn->kn_fflags != 0);
+}
+
+static int
+sysctl_vfs_noremotehang SYSCTL_HANDLER_ARGS
+{
+	int out, error;
+	pid_t pid;
+	size_t space;
+	struct proc *p;
+
+	/* We need a pid. */
+	if (req->newptr == NULL)
+		return (EINVAL);
+
+	error = SYSCTL_IN(req, &pid, sizeof(pid));
+	if (error)
+		return (error);
+
+	p = pfind(pid < 0 ? -pid : pid);
+	if (p == NULL)
+		return (ESRCH);
+
+	/*
+	 * Fetching the value is ok, but we only fetch if the old
+	 * pointer is given.
+	 */
+	if (req->oldptr != NULL) {
+		out = !((p->p_flag & P_NOREMOTEHANG) == 0);
+		error = SYSCTL_OUT(req, &out, sizeof(out));
+		return (error);
+	}
+
+	/* cansignal offers us enough security. */
+	if (p != req->p && suser(req->p->p_ucred, &req->p->p_acflag) != 0)
+		return (EPERM);
+
+	if (pid < 0)
+		p->p_flag &= ~P_NOREMOTEHANG;
+	else
+		p->p_flag |= P_NOREMOTEHANG;
+
+	return (0);
+}
+/* the vfs.generic. branch. */
+SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RW, 0, "vfs generic hinge");
+/* retreive a list of mounted filesystem fsid_t */
+SYSCTL_PROC(_vfs_generic, OID_AUTO, vfsidlist, CTLFLAG_RD,
+    0, 0, sysctl_vfs_vfslist, "S,fsid", "List of mounted filesystem ids");
+/* perform operations on filesystem via fsid_t */
+SYSCTL_NODE(_vfs_generic, OID_AUTO, ctlbyfsid, CTLFLAG_RW,
+    sysctl_vfs_ctlbyfsid, "ctlbyfsid");
+SYSCTL_PROC(_vfs_generic, OID_AUTO, noremotehang, CTLFLAG_RW,
+    0, 0, sysctl_vfs_noremotehang, "I", "noremotehang");
+