/*
- * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* External virtual filesystem routines
*/
+#undef DIAGNOSTIC
#define DIAGNOSTIC 1
#include <sys/param.h>
#include <sys/ubc.h>
#include <sys/vm.h>
#include <sys/sysctl.h>
+#include <sys/filedesc.h>
+#include <sys/event.h>
+
+#include <string.h>
+#include <machine/spl.h>
+
#include <kern/assert.h>
extern kern_return_t
adjust_vm_object_cache(vm_size_t oval, vm_size_t nval);
-/*
- * Insq/Remq for the vnode usage lists.
- */
-#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
-#define bufremvn(bp) { \
- LIST_REMOVE(bp, b_vnbufs); \
- (bp)->b_vnbufs.le_next = NOLIST; \
-}
-
TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
TAILQ_HEAD(inactivelst, vnode) vnode_inactive_list; /* vnode inactive list */
struct mntlist mountlist; /* mounted filesystem list */
#define VORECLAIM_ENABLE(vp) \
do { \
if (ISSET((vp)->v_flag, VORECLAIM)) \
- panic("vm object raclaim already"); \
+ panic("vm_object_reclaim already"); \
SET((vp)->v_flag, VORECLAIM); \
} while(0)
reset_vmobjectcache(unsigned int val1, unsigned int val2)
{
vm_size_t oval = val1 - VNODE_FREE_MIN;
- vm_size_t nval = val2 - VNODE_FREE_MIN;
+ vm_size_t nval;
+
+ if(val2 < VNODE_FREE_MIN)
+ nval = 0;
+ else
+ nval = val2 - VNODE_FREE_MIN;
return(adjust_vm_object_cache(oval, nval));
}
register struct mount *mp;
simple_lock(&mountlist_slock);
- for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
- mp = mp->mnt_list.cqe_next) {
+ CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
simple_unlock(&mountlist_slock);
++xxxfs_mntid;
tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
tfsid.val[1] = mtype;
- if (mountlist.cqh_first != (void *)&mountlist) {
+ if (!CIRCLEQ_EMPTY(&mountlist)) {
while (vfs_getvfs(&tfsid)) {
tfsid.val[0]++;
xxxfs_mntid++;
simple_unlock(&vp->v_interlock);
reclaimhits++;
} else
- break;
- }
+ break;
+ }
}
/*
else
vp->v_ubcinfo = 0;
+ if (vp->v_flag & VHASDIRTY)
+ cluster_release(vp);
+
+ // make sure all these fields are cleared out as the
+ // name/parent stuff uses them and assumes they're
+ // cleared to null/0.
+ if (vp->v_scmap != NULL) {
+ panic("getnewvnode: vp @ 0x%x has non-null scmap.\n", vp);
+ }
+ vp->v_un.vu_name = NULL;
+ vp->v_scdirty = 0;
+ vp->v_un1.v_cl.v_pad = 0;
+
+
vp->v_lastr = -1;
vp->v_ralen = 0;
vp->v_maxra = 0;
- vp->v_lastw = 0;
vp->v_ciosiz = 0;
- vp->v_cstart = 0;
vp->v_clen = 0;
vp->v_socket = 0;
+ /* we may have blocked, re-evaluate state */
+ simple_lock(&vnode_free_list_slock);
+ if (VONLIST(vp)) {
+ if (vp->v_usecount == 0)
+ VREMFREE("getnewvnode", vp);
+ else if (ISSET((vp)->v_flag, VUINACTIVE))
+ VREMINACTIVE("getnewvnode", vp);
+ }
+ simple_unlock(&vnode_free_list_slock);
+
done:
vp->v_flag = VSTANDARD;
vp->v_type = VNON;
simple_unlock(&mntvnode_slock);
}
+__inline void
+vpwakeup(struct vnode *vp)
+{
+ if (vp) {
+ if (--vp->v_numoutput < 0)
+ panic("vpwakeup: neg numoutput");
+ if ((vp->v_flag & VBWAIT || vp->v_flag & VTHROTTLED)
+ && vp->v_numoutput <= 0) {
+ vp->v_flag &= ~(VBWAIT|VTHROTTLED);
+ wakeup((caddr_t)&vp->v_numoutput);
+ }
+ }
+}
+
/*
* Update outstanding I/O count and do wakeup if requested.
*/
vwakeup(bp)
register struct buf *bp;
{
- register struct vnode *vp;
-
CLR(bp->b_flags, B_WRITEINPROG);
- if (vp = bp->b_vp) {
- if (--vp->v_numoutput < 0)
- panic("vwakeup: neg numoutput");
- if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
- if (vp->v_numoutput < 0)
- panic("vwakeup: neg numoutput 2");
- vp->v_flag &= ~VBWAIT;
- wakeup((caddr_t)&vp->v_numoutput);
- }
- }
+ vpwakeup(bp->b_vp);
}
/*
if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) {
return (error);
}
- if (vp->v_dirtyblkhd.lh_first != NULL || (vp->v_flag & VHASDIRTY))
- panic("vinvalbuf: dirty bufs");
+ if (vp->v_dirtyblkhd.lh_first)
+ panic("vinvalbuf: dirty bufs (vp 0x%x, bp 0x%x)", vp, vp->v_dirtyblkhd.lh_first);
}
for (;;) {
- if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
+ if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
while (blist && blist->b_lblkno < 0)
blist = blist->b_vnbufs.le_next;
if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
for (bp = blist; bp; bp = nbp) {
nbp = bp->b_vnbufs.le_next;
- if (flags & V_SAVEMETA && bp->b_lblkno < 0)
+ if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
continue;
s = splbio();
if (ISSET(bp->b_flags, B_BUSY)) {
(void) VOP_BWRITE(bp);
break;
}
- SET(bp->b_flags, B_INVAL);
+
+ if (bp->b_flags & B_LOCKED) {
+ panic("vinvalbuf: bp @ 0x%x is locked!", bp);
+ break;
+ } else {
+ SET(bp->b_flags, B_INVAL);
+ }
brelse(bp);
}
}
return (0);
}
-/*
- * Associate a buffer with a vnode.
- */
-void
-bgetvp(vp, bp)
- register struct vnode *vp;
- register struct buf *bp;
-{
-
- if (bp->b_vp)
- panic("bgetvp: not free");
- VHOLD(vp);
- bp->b_vp = vp;
- if (vp->v_type == VBLK || vp->v_type == VCHR)
- bp->b_dev = vp->v_rdev;
- else
- bp->b_dev = NODEV;
- /*
- * Insert onto list for new vnode.
- */
- bufinsvn(bp, &vp->v_cleanblkhd);
-}
-
-/*
- * Disassociate a buffer from a vnode.
- */
-void
-brelvp(bp)
- register struct buf *bp;
-{
- struct vnode *vp;
-
- if (bp->b_vp == (struct vnode *) 0)
- panic("brelvp: NULL");
- /*
- * Delete from old vnode list, if on one.
- */
- if (bp->b_vnbufs.le_next != NOLIST)
- bufremvn(bp);
- vp = bp->b_vp;
- bp->b_vp = (struct vnode *) 0;
- HOLDRELE(vp);
-}
-
-/*
- * Reassign a buffer from one vnode to another.
- * Used to assign file specific control information
- * (indirect blocks) to the vnode to which they belong.
- */
-void
-reassignbuf(bp, newvp)
- register struct buf *bp;
- register struct vnode *newvp;
-{
- register struct buflists *listheadp;
-
- if (newvp == NULL) {
- printf("reassignbuf: NULL");
- return;
- }
- /*
- * Delete from old vnode list, if on one.
- */
- if (bp->b_vnbufs.le_next != NOLIST)
- bufremvn(bp);
- /*
- * If dirty, put on list of dirty buffers;
- * otherwise insert onto list of clean buffers.
- */
- if (ISSET(bp->b_flags, B_DELWRI))
- listheadp = &newvp->v_dirtyblkhd;
- else
- listheadp = &newvp->v_cleanblkhd;
- bufinsvn(bp, listheadp);
-}
-
/*
* Create a vnode for a block device.
* Used for root filesystem, argdev, and swap areas.
struct proc *p = current_proc(); /* XXX */
struct vnode *vp;
struct vnode **vpp;
- struct specinfo * bufhold;
- int buffree = 1;
+ struct specinfo *specinfop;
if (nvp->v_type != VBLK && nvp->v_type != VCHR)
return (NULLVP);
- bufhold = (struct specinfo *)_MALLOC_ZONE(sizeof(struct specinfo),
- M_VNODE, M_WAITOK);
+ MALLOC_ZONE(specinfop, struct specinfo *, sizeof(struct specinfo),
+ M_SPECINFO, M_WAITOK);
vpp = &speclisth[SPECHASH(nvp_rdev)];
loop:
simple_lock(&spechash_slock);
break;
}
if (vp == NULL || vp->v_tag != VT_NON) {
- nvp->v_specinfo = bufhold;
- buffree = 0; /* buffer used */
+ nvp->v_specinfo = specinfop;
+ specinfop = 0; /* buffer used */
bzero(nvp->v_specinfo, sizeof(struct specinfo));
nvp->v_rdev = nvp_rdev;
nvp->v_hashchain = vpp;
vp->v_tag = nvp->v_tag;
nvp->v_type = VNON;
insmntque(vp, mp);
- if (buffree)
- _FREE_ZONE((void *)bufhold, sizeof (struct specinfo), M_VNODE);
+ if (specinfop)
+ FREE_ZONE((void *)specinfop, sizeof(struct specinfo), M_SPECINFO);
return (vp);
}
struct proc *p;
{
int error = 0;
+ u_long vpid;
+
+ vpid = vp->v_id; // save off the original v_id
+
+retry:
/*
* If the vnode is in the process of being cleaned out for
if (ISSET(vp->v_flag, VTERMINATE)) {
SET(vp->v_flag, VTERMWANT);
simple_unlock(&vp->v_interlock);
- (void)tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vclean", 0);
+ (void)tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vget1", 0);
return (ENOENT);
}
+ /*
+ * if the vnode is being initialized,
+ * wait for it to finish initialization
+ */
+ if (ISSET(vp->v_flag, VUINIT)) {
+ SET(vp->v_flag, VUWANT);
+ simple_unlock(&vp->v_interlock);
+ (void) tsleep((caddr_t)vp, PINOD, "vget2", 0);
+ goto retry;
+ }
+
simple_lock(&vnode_free_list_slock);
- if (vp->v_usecount == 0) {
- /* If on the free list, remove it from there */
- if (VONLIST(vp))
+ if (VONLIST(vp)) {
+ if (vp->v_usecount == 0)
VREMFREE("vget", vp);
- } else {
- /* If on the inactive list, remove it from there */
- if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
- if (VONLIST(vp))
- VREMINACTIVE("vget", vp);
- }
+ else if (ISSET((vp)->v_flag, VUINACTIVE))
+ VREMINACTIVE("vget", vp);
}
-
- /* The vnode should not be on the inactive list here */
- VINACTIVECHECK("vget", vp, 0);
-
simple_unlock(&vnode_free_list_slock);
if (++vp->v_usecount <= 0)
*/
if (UBCISVALID(vp) && !ubc_issetflags(vp, UI_HASOBJREF)) {
simple_unlock(&vp->v_interlock);
- if (ubc_getobject(vp, UBC_HOLDOBJECT)) {
+ if (ubc_getobject(vp, UBC_HOLDOBJECT) == MEMORY_OBJECT_CONTROL_NULL) {
error = ENOENT;
goto errout;
}
if (flags & LK_TYPE_MASK) {
if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
goto errout;
+ if (vpid != vp->v_id) { // make sure it's still the same vnode
+ vput(vp);
+ return ENOENT;
+ }
return (0);
}
if ((flags & LK_INTERLOCK) == 0)
simple_unlock(&vp->v_interlock);
+
+ if (vpid != vp->v_id) { // make sure it's still the same vnode
+ vrele(vp);
+ return ENOENT;
+ }
+
return (0);
errout:
+ simple_lock(&vp->v_interlock);
+
+ /*
+ * we may have blocked. Re-evaluate the state
+ */
+ simple_lock(&vnode_free_list_slock);
+ if (VONLIST(vp)) {
+ if (vp->v_usecount == 0)
+ VREMFREE("vget", vp);
+ else if (ISSET((vp)->v_flag, VUINACTIVE))
+ VREMINACTIVE("vget", vp);
+ }
+ simple_unlock(&vnode_free_list_slock);
+
/*
* If the vnode was not active in the first place
* must not call vrele() as VOP_INACTIVE() is not
* required.
* So inlined part of vrele() here.
*/
- simple_lock(&vp->v_interlock);
if (--vp->v_usecount == 1) {
if (UBCINFOEXISTS(vp)) {
vinactive(vp);
* Get a pager reference on the particular vnode.
*
* This is called from ubc_info_init() and it is asumed that
- * the vnode is neither on the free list on on the inactive list.
+ * the vnode is not on the free list.
* It is also assumed that the vnode is neither being recycled
* by vgonel nor being terminated by vnode_pager_vrele().
*
struct vnode *vp;
{
simple_lock(&vp->v_interlock);
- if (UBCINFOMISSING(vp))
- panic("vnode_pager_vget: stolen ubc_info");
- if (!UBCINFOEXISTS(vp))
- panic("vnode_pager_vget: lost ubc_info");
+ UBCINFOCHECK("vnode_pager_vget", vp);
- if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM))
- panic("vnode_pager_vget: already being reclaimd");
-
- if (ISSET(vp->v_flag, VTERMINATE))
- panic("vnode_pager_vget: already being terminated");
+ if (ISSET(vp->v_flag, (VXLOCK|VORECLAIM|VTERMINATE)))
+ panic("%s: dying vnode", "vnode_pager_vget");
simple_lock(&vnode_free_list_slock);
- /* The vnode should not be on ANY list */
- if (VONLIST(vp))
- panic("vnode_pager_vget: still on the list");
+ /* The vnode should not be on free list */
+ if (VONLIST(vp)) {
+ if (vp->v_usecount == 0)
+ panic("%s: still on list", "vnode_pager_vget");
+ else if (ISSET((vp)->v_flag, VUINACTIVE))
+ VREMINACTIVE("vnode_pager_vget", vp);
+ }
/* The vnode should not be on the inactive list here */
- VINACTIVECHECK("vnode_pager_vget", vp, 0);
simple_unlock(&vnode_free_list_slock);
/* After all those checks, now do the real work :-) */
if (vp->v_vnlock == NULL) {
if ((flags & LK_TYPE_MASK) == LK_DRAIN)
return (0);
- MALLOC_ZONE(vp->v_vnlock, struct lock__bsd__ *,
- sizeof(struct lock__bsd__), M_VNODE, M_WAITOK);
+ MALLOC(vp->v_vnlock, struct lock__bsd__ *,
+ sizeof(struct lock__bsd__), M_TEMP, M_WAITOK);
lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
}
switch (flags & LK_TYPE_MASK) {
panic("vref used where vget required");
/* If on the inactive list, remove it from there */
- if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
- if (VONLIST(vp)) {
- simple_lock(&vnode_free_list_slock);
- VREMINACTIVE("vref", vp);
- simple_unlock(&vnode_free_list_slock);
- }
- }
- /* The vnode should not be on the inactive list here */
- VINACTIVECHECK("vref", vp, 0);
+ simple_lock(&vnode_free_list_slock);
+ if (ISSET((vp)->v_flag, VUINACTIVE))
+ VREMINACTIVE("vref", vp);
+ simple_unlock(&vnode_free_list_slock);
if (++vp->v_usecount <= 0)
panic("vref v_usecount");
simple_unlock(&vp->v_interlock);
}
+static void
+clean_up_name_parent_ptrs(struct vnode *vp)
+{
+ if (VNAME(vp) || VPARENT(vp)) {
+ char *tmp1;
+ struct vnode *tmp2;
+
+ // do it this way so we don't block before clearing
+ // these fields.
+ tmp1 = VNAME(vp);
+ tmp2 = VPARENT(vp);
+ VNAME(vp) = NULL;
+ VPARENT(vp) = NULL;
+
+ if (tmp1) {
+ remove_name(tmp1);
+ }
+
+ if (tmp2) {
+ vrele(tmp2);
+ }
+ }
+}
+
+
/*
* put the vnode on appropriate free list.
* called with v_interlock held.
vfree(vp)
struct vnode *vp;
{
+ funnel_t *curflock;
+ extern int disable_funnel;
+
+ if ((curflock = thread_funnel_get()) != kernel_flock &&
+ !(disable_funnel && curflock != THR_FUNNEL_NULL))
+ panic("Entering vfree() without kernel funnel");
+
/*
* if the vnode is not obtained by calling getnewvnode() we
* are not responsible for the cleanup. Just return.
/* insert at tail of LRU list or at head if VAGE is set */
simple_lock(&vnode_free_list_slock);
+ // make sure the name & parent pointers get cleared out
+// clean_up_name_parent_ptrs(vp);
+
if (VONLIST(vp))
- panic("vfree: vnode still on list");
+ panic("%s: vnode still on list", "vfree");
if (vp->v_flag & VAGE) {
TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
vinactive(vp)
struct vnode *vp;
{
+ funnel_t *curflock;
+ extern int disable_funnel;
+
+ if ((curflock = thread_funnel_get()) != kernel_flock &&
+ !(disable_funnel && curflock != THR_FUNNEL_NULL))
+ panic("Entering vinactive() without kernel funnel");
+
if (!UBCINFOEXISTS(vp))
panic("vinactive: not a UBC vnode");
simple_lock(&vnode_free_list_slock);
if (VONLIST(vp))
- panic("vinactive: vnode still on list");
+ panic("%s: vnode still on list", "vinactive");
VINACTIVECHECK("vinactive", vp, 0);
TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_freelist);
vp->v_usecount, vp->v_writecount);
}
#endif
- if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
- VREMINACTIVE("vrele", vp);
+ simple_lock(&vnode_free_list_slock);
+ if (ISSET((vp)->v_flag, VUINACTIVE))
+ VREMINACTIVE("vref", vp);
+ simple_unlock(&vnode_free_list_slock);
simple_unlock(&vp->v_interlock);
VOP_INACTIVE(vp, p);
struct vnode *vp;
{
struct proc *p = current_proc(); /* XXX */
+ funnel_t *curflock;
+ extern int disable_funnel;
+
+ if ((curflock = thread_funnel_get()) != kernel_flock &&
+ !(disable_funnel && curflock != THR_FUNNEL_NULL))
+ panic("Entering vrele() without kernel funnel");
simple_lock(&vp->v_interlock);
if (--vp->v_usecount == 1) {
if (UBCINFOEXISTS(vp)) {
- vinactive(vp);
+ if ((vp->v_flag & VXLOCK) == 0)
+ vinactive(vp);
simple_unlock(&vp->v_interlock);
return;
}
panic("vrele: ref cnt");
}
#endif
- if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
- VREMINACTIVE("vrele", vp);
-
if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
/* vnode is being cleaned, just return */
simple_lock(&vp->v_interlock);
/*
- * Skip over a vnodes marked VSYSTEM.
+ * Skip over a vnodes marked VSYSTEM or VNOFLUSH.
*/
- if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
+ if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || (vp->v_flag & VNOFLUSH))) {
simple_unlock(&vp->v_interlock);
continue;
}
busy++;
}
simple_unlock(&mntvnode_slock);
- if (busy)
+ if (busy && ((flags & FORCECLOSE)==0))
return (EBUSY);
return (0);
}
struct proc *p;
{
int active;
- void *obj;
- kern_return_t kret;
- int removed = 0;
int didhold;
/*
* so that its count cannot fall to zero and generate a
* race against ourselves to recycle it.
*/
- if (active = vp->v_usecount)
+ if (active = vp->v_usecount) {
+ /*
+ * active vnode can not be on the free list.
+ * we are about to take an extra reference on this vnode
+ * do the queue management as needed
+ * Not doing so can cause "still on list" or
+ * "vnreclaim: v_usecount" panic if VOP_LOCK() blocks.
+ */
+ simple_lock(&vnode_free_list_slock);
+ if (ISSET((vp)->v_flag, VUINACTIVE))
+ VREMINACTIVE("vclean", vp);
+ simple_unlock(&vnode_free_list_slock);
+
if (++vp->v_usecount <= 0)
panic("vclean: v_usecount");
+ }
+
/*
* Prevent the vnode from being recycled or
* brought into use while we clean it out.
VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
/*
+ * While blocked in VOP_LOCK() someone could have dropped
+ * reference[s] and we could land on the inactive list.
* if this vnode is on the inactive list
* take it off the list.
*/
- if ((active == 1) &&
- (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))) {
- simple_lock(&vnode_free_list_slock);
+ simple_lock(&vnode_free_list_slock);
+ if (ISSET((vp)->v_flag, VUINACTIVE))
VREMINACTIVE("vclean", vp);
- simple_unlock(&vnode_free_list_slock);
- removed++;
- }
+ simple_unlock(&vnode_free_list_slock);
/* Clean the pages in VM. */
if (active && (flags & DOCLOSE))
*/
if (flags & DOCLOSE) {
if (vp->v_tag == VT_NFS)
- nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
- else
- vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
- }
+ nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
+ else
+ vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
+ }
if (active)
VOP_INACTIVE(vp, p);
VOP_UNLOCK(vp, 0, p);
/* Destroy ubc named reference */
- if (didhold) {
- ubc_rele(vp);
+ if (didhold) {
+ ubc_rele(vp);
ubc_destroy_named(vp);
}
+ /*
+ * Make sure vp isn't on the inactive list.
+ */
+ simple_lock(&vnode_free_list_slock);
+ if (ISSET((vp)->v_flag, VUINACTIVE)) {
+ VREMINACTIVE("vclean", vp);
+ }
+ simple_unlock(&vnode_free_list_slock);
/*
* Reclaim the vnode.
*/
if (VOP_RECLAIM(vp, p))
panic("vclean: cannot reclaim");
+
+ // make sure the name & parent ptrs get cleaned out!
+ clean_up_name_parent_ptrs(vp);
+
cache_purge(vp);
if (vp->v_vnlock) {
- if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
+ struct lock__bsd__ *tmp = vp->v_vnlock;
+ if ((tmp->lk_flags & LK_DRAINED) == 0)
vprint("vclean: lock not drained", vp);
- FREE_ZONE(vp->v_vnlock, sizeof (struct lock__bsd__), M_VNODE);
vp->v_vnlock = NULL;
+ FREE(tmp, M_TEMP);
}
/* It's dead, Jim! */
vp->v_op = dead_vnodeop_p;
vp->v_tag = VT_NON;
+ insmntque(vp, (struct mount *)0);
+
/*
* Done with purge, notify sleepers of the grim news.
*/
vp->v_flag &= ~VALIASED;
}
simple_unlock(&spechash_slock);
- FREE_ZONE(vp->v_specinfo, sizeof (struct specinfo), M_VNODE);
+ {
+ struct specinfo *tmp = vp->v_specinfo;
vp->v_specinfo = NULL;
+ FREE_ZONE((void *)tmp, sizeof(struct specinfo), M_SPECINFO);
+ }
}
/*
* If it is on the freelist and not already at the head,
* getnewvnode after removing it from the freelist to ensure
* that we do not try to move it here.
*/
- if (vp->v_usecount == 0) {
+ if (vp->v_usecount == 0 && (vp->v_flag & VUINACTIVE) == 0) {
simple_lock(&vnode_free_list_slock);
if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
vnode_free_list.tqh_first != vp) {
strcat(buf, "|VTEXT");
if (vp->v_flag & VSYSTEM)
strcat(buf, "|VSYSTEM");
+ if (vp->v_flag & VNOFLUSH)
+ strcat(buf, "|VNOFLUSH");
if (vp->v_flag & VXLOCK)
strcat(buf, "|VXLOCK");
if (vp->v_flag & VXWANT)
}
#endif
+static int
+build_path(struct vnode *vp, char *buff, int buflen, int *outlen)
+{
+ char *end, *str;
+ int i, len, ret=0, counter=0;
+
+ end = &buff[buflen-1];
+ *--end = '\0';
+
+ while(vp && VPARENT(vp) != vp) {
+ // the maximum depth of a file system hierarchy is MAXPATHLEN/2
+ // (with single-char names separated by slashes). we panic if
+ // we've ever looped more than that.
+ if (counter++ > MAXPATHLEN/2) {
+ panic("build_path: vnode parent chain is too long! vp 0x%x\n", vp);
+ }
+ str = VNAME(vp);
+ if (VNAME(vp) == NULL) {
+ if (VPARENT(vp) != NULL) {
+ ret = EINVAL;
+ }
+ break;
+ }
+
+ // count how long the string is
+ for(len=0; *str; str++, len++)
+ /* nothing */;
+
+ // check that there's enough space
+ if ((end - buff) < len) {
+ ret = ENOSPC;
+ break;
+ }
+
+ // copy it backwards
+ for(; len > 0; len--) {
+ *--end = *--str;
+ }
+
+ // put in the path separator
+ *--end = '/';
+
+ // walk up the chain.
+ vp = VPARENT(vp);
+
+ // check if we're crossing a mount point and
+ // switch the vp if we are.
+ if (vp && (vp->v_flag & VROOT)) {
+ vp = vp->v_mount->mnt_vnodecovered;
+ }
+ }
+
+ // slide it down to the beginning of the buffer
+ memmove(buff, end, &buff[buflen] - end);
+
+ *outlen = &buff[buflen] - end;
+
+ return ret;
+}
+
+__private_extern__ int
+vn_getpath(struct vnode *vp, char *pathbuf, int *len)
+{
+ return build_path(vp, pathbuf, *len, len);
+}
+
+
+
/*
* Top level filesystem related information gathering.
*/
size_t newlen;
struct proc *p;
{
- struct ctldebug *cdp;
struct vfsconf *vfsp;
+ int *username;
+ u_int usernamelen;
+ int error;
- if (name[0] == VFS_NUMMNTOPS) {
+ /*
+ * The VFS_NUMMNTOPS shouldn't be at name[0] since
+ * is a VFS generic variable. So now we must check
+ * namelen so we don't end up covering any UFS
+ * variables (sinc UFS vfc_typenum is 1).
+ *
+ * It should have been:
+ * name[0]: VFS_GENERIC
+ * name[1]: VFS_NUMMNTOPS
+ */
+ if (namelen == 1 && name[0] == VFS_NUMMNTOPS) {
extern unsigned int vfs_nummntops;
return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops));
}
/* all sysctl names at this level are at least name and field */
if (namelen < 2)
- return (ENOTDIR); /* overloaded */
+ return (EISDIR); /* overloaded */
if (name[0] != VFS_GENERIC) {
for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
if (vfsp->vfc_typenum == name[0])
return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
sizeof(struct vfsconf)));
}
- return (EOPNOTSUPP);
+ /*
+ * We need to get back into the general MIB, so we need to re-prepend
+ * CTL_VFS to our name and try userland_sysctl().
+ */
+ usernamelen = namelen + 1;
+ MALLOC(username, int *, usernamelen * sizeof(*username),
+ M_TEMP, M_WAITOK);
+ bcopy(name, username + 1, namelen * sizeof(*name));
+ username[0] = CTL_VFS;
+ error = userland_sysctl(p, username, usernamelen, oldp, oldlenp, 1,
+ newp, newlen, oldlenp);
+ FREE(username, M_TEMP);
+ return (error);
}
int kinfo_vdebug = 1;
nvp = vp->v_mntvnodes.le_next;
if (bp + VPTRSZ + VNODESZ > ewhere) {
simple_unlock(&mntvnode_slock);
+ vfs_unbusy(mp, p);
*sizep = bp - where;
return (ENOMEM);
}
simple_unlock(&mntvnode_slock);
if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
- (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
+ (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) {
+ vfs_unbusy(mp, p);
return (error);
+ }
bp += VPTRSZ + VNODESZ;
simple_lock(&mntvnode_slock);
}
static int
vnreclaim(int count)
{
- int cnt, i, loopcnt;
- void *obj;
+ int i, loopcnt;
struct vnode *vp;
int err;
struct proc *p;
- kern_return_t kret;
i = 0;
loopcnt = 0;
goto restart;
}
+ /*
+ * if the vnode is being initialized,
+ * skip over it
+ */
+ if (ISSET(vp->v_flag, VUINIT)) {
+ SET(vp->v_flag, VUWANT);
+ simple_unlock(&vp->v_interlock);
+ continue;
+ }
+
VREMINACTIVE("vnreclaim", vp);
simple_unlock(&vnode_free_list_slock);
boolean_t funnel_state;
int isvnreclaim = 1;
- if (vp == (struct vnode *) NULL)
- panic("vnode_pager_vrele: null vp");
-
funnel_state = thread_funnel_set(kernel_flock, TRUE);
/* Mark the vnode to be recycled */
}
if (!ISSET(vp->v_flag, VTERMINATE))
SET(vp->v_flag, VTERMINATE);
+
+ cache_purge(vp);
+
if (UBCINFOEXISTS(vp)) {
struct ubc_info *uip = vp->v_ubcinfo;
void
walk_allvnodes()
{
- struct proc *p = current_proc();
struct mount *mp, *nmp;
struct vnode *vp;
int cnt = 0;
}
#endif /* DIAGNOSTIC */
+
+struct x_constraints {
+ u_int32_t x_maxreadcnt;
+ u_int32_t x_maxsegreadsize;
+ u_int32_t x_maxsegwritesize;
+};
+
+
void
vfs_io_attributes(vp, flags, iosize, vectors)
struct vnode *vp;
if (mp != NULL) {
switch (flags) {
case B_READ:
- *iosize = mp->mnt_maxreadcnt;
+ if (mp->mnt_kern_flag & MNTK_IO_XINFO)
+ *iosize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxreadcnt;
+ else
+ *iosize = mp->mnt_maxreadcnt;
*vectors = mp->mnt_segreadcnt;
break;
case B_WRITE:
default:
break;
}
+ if (*iosize == 0)
+ *iosize = MAXPHYS;
+ if (*vectors == 0)
+ *vectors = 32;
}
-
return;
}
-#include <dev/disk.h>
+__private_extern__
+void
+vfs_io_maxsegsize(vp, flags, maxsegsize)
+ struct vnode *vp;
+ int flags; /* B_READ or B_WRITE */
+ int *maxsegsize;
+{
+ struct mount *mp;
+
+ /* start with "reasonable" default */
+ *maxsegsize = MAXPHYS;
+
+ mp = vp->v_mount;
+ if (mp != NULL) {
+ switch (flags) {
+ case B_READ:
+ if (mp->mnt_kern_flag & MNTK_IO_XINFO)
+ *maxsegsize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegreadsize;
+ else
+ /*
+ * if the extended info doesn't exist
+ * then use the maxread I/O size as the
+ * max segment size... this is the previous behavior
+ */
+ *maxsegsize = mp->mnt_maxreadcnt;
+ break;
+ case B_WRITE:
+ if (mp->mnt_kern_flag & MNTK_IO_XINFO)
+ *maxsegsize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegwritesize;
+ else
+ /*
+ * if the extended info doesn't exist
+ * then use the maxwrite I/O size as the
+ * max segment size... this is the previous behavior
+ */
+ *maxsegsize = mp->mnt_maxwritecnt;
+ break;
+ default:
+ break;
+ }
+ if (*maxsegsize == 0)
+ *maxsegsize = MAXPHYS;
+ }
+}
+
+
+#include <sys/disk.h>
+
int
vfs_init_io_attributes(devvp, mp)
int error;
off_t readblockcnt;
off_t writeblockcnt;
+ off_t readmaxcnt;
+ off_t writemaxcnt;
off_t readsegcnt;
off_t writesegcnt;
+ off_t readsegsize;
+ off_t writesegsize;
u_long blksize;
u_int64_t temp;
struct proc *p = current_proc();
struct ucred *cred = p->p_ucred;
+ int isvirtual = 0;
+ /*
+ * determine if this mount point exists on the same device as the root
+ * partition... if so, then it comes under the hard throttle control
+ */
+ int thisunit = -1;
+ static int rootunit = -1;
+ extern struct vnode *rootvp;
+
+ if (rootunit == -1) {
+ if (VOP_IOCTL(rootvp, DKIOCGETBSDUNIT, (caddr_t)&rootunit, 0, cred, p))
+ rootunit = -1;
+ else if (rootvp == devvp)
+ mp->mnt_kern_flag |= MNTK_ROOTDEV;
+ }
+ if (devvp != rootvp && rootunit != -1) {
+ if (VOP_IOCTL(devvp, DKIOCGETBSDUNIT, (caddr_t)&thisunit, 0, cred, p) == 0) {
+ if (thisunit == rootunit)
+ mp->mnt_kern_flag |= MNTK_ROOTDEV;
+ }
+ }
+ if (VOP_IOCTL(devvp, DKIOCGETISVIRTUAL, (caddr_t)&isvirtual, 0, cred, p) == 0) {
+ if (isvirtual)
+ mp->mnt_kern_flag |= MNTK_VIRTUALDEV;
+ }
+
if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD,
(caddr_t)&readblockcnt, 0, cred, p)))
return (error);
(caddr_t)&writeblockcnt, 0, cred, p)))
return (error);
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTREAD,
+ (caddr_t)&readmaxcnt, 0, cred, p)))
+ return (error);
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTWRITE,
+ (caddr_t)&writemaxcnt, 0, cred, p)))
+ return (error);
+
if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD,
(caddr_t)&readsegcnt, 0, cred, p)))
return (error);
(caddr_t)&writesegcnt, 0, cred, p)))
return (error);
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTREAD,
+ (caddr_t)&readsegsize, 0, cred, p)))
+ return (error);
+
+ if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTWRITE,
+ (caddr_t)&writesegsize, 0, cred, p)))
+ return (error);
+
if ((error = VOP_IOCTL(devvp, DKIOCGETBLOCKSIZE,
(caddr_t)&blksize, 0, cred, p)))
return (error);
- temp = readblockcnt * blksize;
- temp = (temp > UINT32_MAX) ? (UINT32_MAX / blksize) * blksize : temp;
- mp->mnt_maxreadcnt = (u_int32_t)temp;
- temp = writeblockcnt * blksize;
- temp = (temp > UINT32_MAX) ? (UINT32_MAX / blksize) * blksize : temp;
+ if ( !(mp->mnt_kern_flag & MNTK_IO_XINFO)) {
+ MALLOC(mp->mnt_xinfo_ptr, void *, sizeof(struct x_constraints), M_TEMP, M_WAITOK);
+ mp->mnt_kern_flag |= MNTK_IO_XINFO;
+ }
+
+ if (readmaxcnt)
+ temp = (readmaxcnt > UINT32_MAX) ? UINT32_MAX : readmaxcnt;
+ else {
+ if (readblockcnt) {
+ temp = readblockcnt * blksize;
+ temp = (temp > UINT32_MAX) ? UINT32_MAX : temp;
+ } else
+ temp = MAXPHYS;
+ }
+ ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxreadcnt = (u_int32_t)temp;
+
+ if (writemaxcnt)
+ temp = (writemaxcnt > UINT32_MAX) ? UINT32_MAX : writemaxcnt;
+ else {
+ if (writeblockcnt) {
+ temp = writeblockcnt * blksize;
+ temp = (temp > UINT32_MAX) ? UINT32_MAX : temp;
+ } else
+ temp = MAXPHYS;
+ }
mp->mnt_maxwritecnt = (u_int32_t)temp;
- temp = (readsegcnt > UINT16_MAX) ? UINT16_MAX : readsegcnt;
- mp->mnt_segreadcnt = (u_int16_t)temp;
+ if (readsegcnt) {
+ temp = (readsegcnt > UINT16_MAX) ? UINT16_MAX : readsegcnt;
+ mp->mnt_segreadcnt = (u_int16_t)temp;
+ }
+ if (writesegcnt) {
+ temp = (writesegcnt > UINT16_MAX) ? UINT16_MAX : writesegcnt;
+ mp->mnt_segwritecnt = (u_int16_t)temp;
+ }
+ if (readsegsize)
+ temp = (readsegsize > UINT32_MAX) ? UINT32_MAX : readsegsize;
+ else
+ temp = mp->mnt_maxreadcnt;
+ ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegreadsize = (u_int32_t)temp;
- temp = (writesegcnt > UINT16_MAX) ? UINT16_MAX : writesegcnt;
- mp->mnt_segwritecnt = (u_int16_t)temp;
+ if (writesegsize)
+ temp = (writesegsize > UINT32_MAX) ? UINT32_MAX : writesegsize;
+ else
+ temp = mp->mnt_maxwritecnt;
+ ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegwritesize = (u_int32_t)temp;
-#if 0
- printf("--- IO attributes for mount point 0x%08x ---\n", mp);
- printf("\tmnt_maxreadcnt = 0x%x", mp->mnt_maxreadcnt);
- printf("\tmnt_maxwritecnt = 0x%x\n", mp->mnt_maxwritecnt);
- printf("\tmnt_segreadcnt = 0x%x", mp->mnt_segreadcnt);
- printf("\tmnt_segwritecnt = 0x%x\n", mp->mnt_segwritecnt);
-#endif /* 0 */
+ return (error);
+}
+
+static struct klist fs_klist;
+
+void
+vfs_event_init(void)
+{
+
+ klist_init(&fs_klist);
+}
+
+void
+vfs_event_signal(fsid_t *fsid, u_int32_t event, intptr_t data)
+{
+
+ KNOTE(&fs_klist, event);
+}
+
+/*
+ * return the number of mounted filesystems.
+ */
+static int
+sysctl_vfs_getvfscnt(void)
+{
+ struct mount *mp;
+ int ret = 0;
+
+ simple_lock(&mountlist_slock);
+ CIRCLEQ_FOREACH(mp, &mountlist, mnt_list)
+ ret++;
+ simple_unlock(&mountlist_slock);
+ return (ret);
+}
+
+/*
+ * fill in the array of fsid_t's up to a max of 'count', the actual
+ * number filled in will be set in '*actual'. If there are more fsid_t's
+ * than room in fsidlst then ENOMEM will be returned and '*actual' will
+ * have the actual count.
+ * having *actual filled out even in the error case is depended upon.
+ */
+static int
+sysctl_vfs_getvfslist(fsid_t *fsidlst, int count, int *actual)
+{
+ struct mount *mp;
+
+ *actual = 0;
+ simple_lock(&mountlist_slock);
+ CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
+ (*actual)++;
+ if (*actual <= count)
+ fsidlst[(*actual) - 1] = mp->mnt_stat.f_fsid;
+ }
+ simple_unlock(&mountlist_slock);
+ return (*actual <= count ? 0 : ENOMEM);
+}
+
+static int
+sysctl_vfs_vfslist SYSCTL_HANDLER_ARGS
+{
+ int actual, error;
+ size_t space;
+ fsid_t *fsidlst;
+
+ /* This is a readonly node. */
+ if (req->newptr != NULL)
+ return (EPERM);
+
+ /* they are querying us so just return the space required. */
+ if (req->oldptr == NULL) {
+ req->oldidx = sysctl_vfs_getvfscnt() * sizeof(fsid_t);
+ return 0;
+ }
+again:
+ /*
+ * Retrieve an accurate count of the amount of space required to copy
+ * out all the fsids in the system.
+ */
+ space = req->oldlen;
+ req->oldlen = sysctl_vfs_getvfscnt() * sizeof(fsid_t);
+
+ /* they didn't give us enough space. */
+ if (space < req->oldlen)
+ return (ENOMEM);
+ MALLOC(fsidlst, fsid_t *, req->oldlen, M_TEMP, M_WAITOK);
+ error = sysctl_vfs_getvfslist(fsidlst, req->oldlen / sizeof(fsid_t),
+ &actual);
+ /*
+ * If we get back ENOMEM, then another mount has been added while we
+ * slept in malloc above. If this is the case then try again.
+ */
+ if (error == ENOMEM) {
+ FREE(fsidlst, M_TEMP);
+ req->oldlen = space;
+ goto again;
+ }
+ if (error == 0) {
+ error = SYSCTL_OUT(req, fsidlst, actual * sizeof(fsid_t));
+ }
+ FREE(fsidlst, M_TEMP);
+ return (error);
+}
+
+/*
+ * Do a sysctl by fsid.
+ */
+static int
+sysctl_vfs_ctlbyfsid SYSCTL_HANDLER_ARGS
+{
+ struct vfsidctl vc;
+ struct mount *mp;
+ struct statfs *sp;
+ struct proc *p;
+ int *name;
+ int error, flags, namelen;
+
+ name = arg1;
+ namelen = arg2;
+ p = req->p;
+
+ error = SYSCTL_IN(req, &vc, sizeof(vc));
+ if (error)
+ return (error);
+ if (vc.vc_vers != VFS_CTL_VERS1)
+ return (EINVAL);
+ mp = vfs_getvfs(&vc.vc_fsid);
+ if (mp == NULL)
+ return (ENOENT);
+ /* reset so that the fs specific code can fetch it. */
+ req->newidx = 0;
+ /*
+ * Note if this is a VFS_CTL then we pass the actual sysctl req
+ * in for "oldp" so that the lower layer can DTRT and use the
+ * SYSCTL_IN/OUT routines.
+ */
+ if (mp->mnt_op->vfs_sysctl != NULL) {
+ error = mp->mnt_op->vfs_sysctl(name, namelen,
+ req, NULL, NULL, 0, req->p);
+ if (error != EOPNOTSUPP)
+ return (error);
+ }
+ switch (name[0]) {
+ case VFS_CTL_UMOUNT:
+ VCTLTOREQ(&vc, req);
+ error = SYSCTL_IN(req, &flags, sizeof(flags));
+ if (error)
+ break;
+ error = safedounmount(mp, flags, p);
+ break;
+ case VFS_CTL_STATFS:
+ VCTLTOREQ(&vc, req);
+ error = SYSCTL_IN(req, &flags, sizeof(flags));
+ if (error)
+ break;
+ sp = &mp->mnt_stat;
+ if (((flags & MNT_NOWAIT) == 0 || (flags & MNT_WAIT)) &&
+ (error = VFS_STATFS(mp, sp, p)))
+ return (error);
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ error = SYSCTL_OUT(req, sp, sizeof(*sp));
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
return (error);
}
+static int filt_fsattach(struct knote *kn);
+static void filt_fsdetach(struct knote *kn);
+static int filt_fsevent(struct knote *kn, long hint);
+
+struct filterops fs_filtops =
+ { 0, filt_fsattach, filt_fsdetach, filt_fsevent };
+
+static int
+filt_fsattach(struct knote *kn)
+{
+
+ kn->kn_flags |= EV_CLEAR;
+ KNOTE_ATTACH(&fs_klist, kn);
+ return (0);
+}
+
+static void
+filt_fsdetach(struct knote *kn)
+{
+
+ KNOTE_DETACH(&fs_klist, kn);
+}
+
+static int
+filt_fsevent(struct knote *kn, long hint)
+{
+
+ kn->kn_fflags |= hint;
+ return (kn->kn_fflags != 0);
+}
+
+static int
+sysctl_vfs_noremotehang SYSCTL_HANDLER_ARGS
+{
+ int out, error;
+ pid_t pid;
+ size_t space;
+ struct proc *p;
+
+ /* We need a pid. */
+ if (req->newptr == NULL)
+ return (EINVAL);
+
+ error = SYSCTL_IN(req, &pid, sizeof(pid));
+ if (error)
+ return (error);
+
+ p = pfind(pid < 0 ? -pid : pid);
+ if (p == NULL)
+ return (ESRCH);
+
+ /*
+ * Fetching the value is ok, but we only fetch if the old
+ * pointer is given.
+ */
+ if (req->oldptr != NULL) {
+ out = !((p->p_flag & P_NOREMOTEHANG) == 0);
+ error = SYSCTL_OUT(req, &out, sizeof(out));
+ return (error);
+ }
+
+ /* cansignal offers us enough security. */
+ if (p != req->p && suser(req->p->p_ucred, &req->p->p_acflag) != 0)
+ return (EPERM);
+
+ if (pid < 0)
+ p->p_flag &= ~P_NOREMOTEHANG;
+ else
+ p->p_flag |= P_NOREMOTEHANG;
+
+ return (0);
+}
+/* the vfs.generic. branch. */
+SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RW, 0, "vfs generic hinge");
+/* retreive a list of mounted filesystem fsid_t */
+SYSCTL_PROC(_vfs_generic, OID_AUTO, vfsidlist, CTLFLAG_RD,
+ 0, 0, sysctl_vfs_vfslist, "S,fsid", "List of mounted filesystem ids");
+/* perform operations on filesystem via fsid_t */
+SYSCTL_NODE(_vfs_generic, OID_AUTO, ctlbyfsid, CTLFLAG_RW,
+ sysctl_vfs_ctlbyfsid, "ctlbyfsid");
+SYSCTL_PROC(_vfs_generic, OID_AUTO, noremotehang, CTLFLAG_RW,
+ 0, 0, sysctl_vfs_noremotehang, "I", "noremotehang");
+