bsd/vfs/vfs_subr.c

   1 /*
   2  * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   7  *
   8  * This file contains Original Code and/or Modifications of Original Code
   9  * as defined in and that are subject to the Apple Public Source License
  10  * Version 2.0 (the 'License'). You may not use this file except in
  11  * compliance with the License. Please obtain a copy of the License at
  12  * http://www.opensource.apple.com/apsl/ and read it before using this
  13  * file.
  14  *
  15  * The Original Code and all software distributed under the License are
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  20  * Please see the License for the specific language governing rights and
  21  * limitations under the License.
  22  *
  23  * @APPLE_LICENSE_HEADER_END@
  24  */
  25 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  26 /*
  27  * Copyright (c) 1989, 1993
  28  *      The Regents of the University of California.  All rights reserved.
  29  * (c) UNIX System Laboratories, Inc.
  30  * All or some portions of this file are derived from material licensed
  31  * to the University of California by American Telephone and Telegraph
  32  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  33  * the permission of UNIX System Laboratories, Inc.
  34  *
  35  * Redistribution and use in source and binary forms, with or without
  36  * modification, are permitted provided that the following conditions
  37  * are met:
  38  * 1. Redistributions of source code must retain the above copyright
  39  *    notice, this list of conditions and the following disclaimer.
  40  * 2. Redistributions in binary form must reproduce the above copyright
  41  *    notice, this list of conditions and the following disclaimer in the
  42  *    documentation and/or other materials provided with the distribution.
  43  * 3. All advertising materials mentioning features or use of this software
  44  *    must display the following acknowledgement:
  45  *      This product includes software developed by the University of
  46  *      California, Berkeley and its contributors.
  47  * 4. Neither the name of the University nor the names of its contributors
  48  *    may be used to endorse or promote products derived from this software
  49  *    without specific prior written permission.
  50  *
  51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  61  * SUCH DAMAGE.
  62  *
  63  *      @(#)vfs_subr.c  8.31 (Berkeley) 5/26/95
  64  */
  65
  66 /*
  67  * External virtual filesystem routines
  68  */
  69
  70 #undef  DIAGNOSTIC
  71 #define DIAGNOSTIC 1
  72
  73 #include <sys/param.h>
  74 #include <sys/systm.h>
  75 #include <sys/proc.h>
  76 #include <sys/mount.h>
  77 #include <sys/time.h>
  78 #include <sys/vnode.h>
  79 #include <sys/stat.h>
  80 #include <sys/namei.h>
  81 #include <sys/ucred.h>
  82 #include <sys/buf.h>
  83 #include <sys/errno.h>
  84 #include <sys/malloc.h>
  85 #include <sys/domain.h>
  86 #include <sys/mbuf.h>
  87 #include <sys/syslog.h>
  88 #include <sys/ubc.h>
  89 #include <sys/vm.h>
  90 #include <sys/sysctl.h>
  91 #include <sys/filedesc.h>
  92 #include <sys/event.h>
  93
  94 #include <string.h>
  95 #include <machine/spl.h>
  96
  97
  98 #include <kern/assert.h>
  99
 100 #include <miscfs/specfs/specdev.h>
 101
 102 #include <mach/mach_types.h>
 103 #include <mach/memory_object_types.h>
 104
 105
 106 enum vtype iftovt_tab[16] = {
 107         VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 108         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
 109 };
 110 int     vttoif_tab[9] = {
 111         0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
 112         S_IFSOCK, S_IFIFO, S_IFMT,
 113 };
 114
 115 static void vfree(struct vnode *vp);
 116 static void vinactive(struct vnode *vp);
 117 static int vnreclaim(int count);
 118 extern kern_return_t
 119         adjust_vm_object_cache(vm_size_t oval, vm_size_t nval);
 120
 121 TAILQ_HEAD(freelst, vnode) vnode_free_list;     /* vnode free list */
 122 TAILQ_HEAD(inactivelst, vnode) vnode_inactive_list;     /* vnode inactive list */
 123 struct mntlist mountlist;                       /* mounted filesystem list */
 124
 125 #if DIAGNOSTIC
 126 #define VLISTCHECK(fun, vp, list)       \
 127         if ((vp)->v_freelist.tqe_prev == (struct vnode **)0xdeadb) \
 128                 panic("%s: %s vnode not on %slist", (fun), (list), (list));
 129
 130 #define VINACTIVECHECK(fun, vp, expected)       \
 131         do {    \
 132                 int __is_inactive = ISSET((vp)->v_flag, VUINACTIVE);    \
 133                 if (__is_inactive ^ expected)   \
 134                         panic("%s: %sinactive vnode, expected %s", (fun),       \
 135                                 __is_inactive? "" : "not ",     \
 136                                 expected? "inactive": "not inactive"); \
 137         } while(0)
 138 #else
 139 #define VLISTCHECK(fun, vp, list)
 140 #define VINACTIVECHECK(fun, vp, expected)
 141 #endif /* DIAGNOSTIC */
 142
 143 #define VLISTNONE(vp)   \
 144         do {    \
 145                 (vp)->v_freelist.tqe_next = (struct vnode *)0;  \
 146                 (vp)->v_freelist.tqe_prev = (struct vnode **)0xdeadb;   \
 147         } while(0)
 148
 149 #define VONLIST(vp)     \
 150         ((vp)->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
 151
 152 /* remove a vnode from free vnode list */
 153 #define VREMFREE(fun, vp)       \
 154         do {    \
 155                 VLISTCHECK((fun), (vp), "free");        \
 156                 TAILQ_REMOVE(&vnode_free_list, (vp), v_freelist);       \
 157                 VLISTNONE((vp));        \
 158                 freevnodes--;   \
 159         } while(0)
 160
 161 /* remove a vnode from inactive vnode list */
 162 #define VREMINACTIVE(fun, vp)   \
 163         do {    \
 164                 VLISTCHECK((fun), (vp), "inactive"); \
 165                 VINACTIVECHECK((fun), (vp), VUINACTIVE); \
 166                 TAILQ_REMOVE(&vnode_inactive_list, (vp), v_freelist); \
 167                 CLR((vp)->v_flag, VUINACTIVE); \
 168                 VLISTNONE((vp));        \
 169                 inactivevnodes--;       \
 170         } while(0)
 171
 172 #define VORECLAIM_ENABLE(vp)   \
 173         do {    \
 174                 if (ISSET((vp)->v_flag, VORECLAIM))     \
 175                         panic("vm_object_reclaim already");     \
 176                 SET((vp)->v_flag, VORECLAIM);   \
 177         } while(0)
 178
 179 #define VORECLAIM_DISABLE(vp)   \
 180         do {    \
 181                 CLR((vp)->v_flag, VORECLAIM);   \
 182                 if (ISSET((vp)->v_flag, VXWANT)) {      \
 183                         CLR((vp)->v_flag, VXWANT);      \
 184                         wakeup((caddr_t)(vp));  \
 185                 }       \
 186         } while(0)
 187
 188 /*
 189  * Have to declare first two locks as actual data even if !MACH_SLOCKS, since
 190  * a pointers to them get passed around.
 191  */
 192 simple_lock_data_t mountlist_slock;
 193 simple_lock_data_t mntvnode_slock;
 194 decl_simple_lock_data(,mntid_slock);
 195 decl_simple_lock_data(,vnode_free_list_slock);
 196 decl_simple_lock_data(,spechash_slock);
 197
 198 /*
 199  * vnodetarget is the amount of vnodes we expect to get back
 200  * from the the inactive vnode list and VM object cache.
 201  * As vnreclaim() is a mainly cpu bound operation for faster
 202  * processers this number could be higher.
 203  * Having this number too high introduces longer delays in
 204  * the execution of getnewvnode().
 205  */
 206 unsigned long vnodetarget;              /* target for vnreclaim() */
 207 #define VNODE_FREE_TARGET       20      /* Default value for vnodetarget */
 208
 209 /*
 210  * We need quite a few vnodes on the free list to sustain the
 211  * rapid stat() the compilation process does, and still benefit from the name
 212  * cache. Having too few vnodes on the free list causes serious disk
 213  * thrashing as we cycle through them.
 214  */
 215 #define VNODE_FREE_MIN          300     /* freelist should have at least these many */
 216
 217 /*
 218  * We need to get vnodes back from the VM object cache when a certain #
 219  * of vnodes are reused from the freelist. This is essential for the
 220  * caching to be effective in the namecache and the buffer cache [for the
 221  * metadata].
 222  */
 223 #define VNODE_TOOMANY_REUSED    (VNODE_FREE_MIN/4)
 224
 225 /*
 226  * If we have enough vnodes on the freelist we do not want to reclaim
 227  * the vnodes from the VM object cache.
 228  */
 229 #define VNODE_FREE_ENOUGH       (VNODE_FREE_MIN + (VNODE_FREE_MIN/2))
 230
 231 /*
 232  * Initialize the vnode management data structures.
 233  */
 234 __private_extern__ void
 235 vntblinit()
 236 {
 237         extern struct lock__bsd__       exchangelock;
 238
 239         simple_lock_init(&mountlist_slock);
 240         simple_lock_init(&mntvnode_slock);
 241         simple_lock_init(&mntid_slock);
 242         simple_lock_init(&spechash_slock);
 243         TAILQ_INIT(&vnode_free_list);
 244         simple_lock_init(&vnode_free_list_slock);
 245         TAILQ_INIT(&vnode_inactive_list);
 246         CIRCLEQ_INIT(&mountlist);
 247     lockinit(&exchangelock, PVFS, "exchange", 0, 0);
 248
 249         if (!vnodetarget)
 250                 vnodetarget = VNODE_FREE_TARGET;
 251
 252         /*
 253          * Scale the vm_object_cache to accomodate the vnodes
 254          * we want to cache
 255          */
 256         (void) adjust_vm_object_cache(0, desiredvnodes - VNODE_FREE_MIN);
 257 }
 258
 259 /* Reset the VM Object Cache with the values passed in */
 260 __private_extern__ kern_return_t
 261 reset_vmobjectcache(unsigned int val1, unsigned int val2)
 262 {
 263         vm_size_t oval = val1 - VNODE_FREE_MIN;
 264         vm_size_t nval;
 265
 266         if(val2 < VNODE_FREE_MIN)
 267                 nval = 0;
 268         else
 269                 nval = val2 - VNODE_FREE_MIN;
 270
 271         return(adjust_vm_object_cache(oval, nval));
 272 }
 273
 274 /*
 275  * Mark a mount point as busy. Used to synchronize access and to delay
 276  * unmounting. Interlock is not released on failure.
 277  */
 278 int
 279 vfs_busy(mp, flags, interlkp, p)
 280         struct mount *mp;
 281         int flags;
 282         struct slock *interlkp;
 283         struct proc *p;
 284 {
 285         int lkflags;
 286
 287         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 288                 if (flags & LK_NOWAIT)
 289                         return (ENOENT);
 290                 mp->mnt_kern_flag |= MNTK_MWAIT;
 291                 if (interlkp)
 292                         simple_unlock(interlkp);
 293                 /*
 294                  * Since all busy locks are shared except the exclusive
 295                  * lock granted when unmounting, the only place that a
 296                  * wakeup needs to be done is at the release of the
 297                  * exclusive lock at the end of dounmount.
 298                  */
 299                 sleep((caddr_t)mp, PVFS);
 300                 if (interlkp)
 301                         simple_lock(interlkp);
 302                 return (ENOENT);
 303         }
 304         lkflags = LK_SHARED;
 305         if (interlkp)
 306                 lkflags |= LK_INTERLOCK;
 307         if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
 308                 panic("vfs_busy: unexpected lock failure");
 309         return (0);
 310 }
 311
 312 /*
 313  * Free a busy filesystem.
 314  */
 315 void
 316 vfs_unbusy(mp, p)
 317         struct mount *mp;
 318         struct proc *p;
 319 {
 320
 321         lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
 322 }
 323
 324 /*
 325  * Lookup a filesystem type, and if found allocate and initialize
 326  * a mount structure for it.
 327  *
 328  * Devname is usually updated by mount(8) after booting.
 329  */
 330 int
 331 vfs_rootmountalloc(fstypename, devname, mpp)
 332         char *fstypename;
 333         char *devname;
 334         struct mount **mpp;
 335 {
 336         struct proc *p = current_proc();        /* XXX */
 337         struct vfsconf *vfsp;
 338         struct mount *mp;
 339
 340         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 341                 if (!strcmp(vfsp->vfc_name, fstypename))
 342                         break;
 343         if (vfsp == NULL)
 344                 return (ENODEV);
 345         mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 346         bzero((char *)mp, (u_long)sizeof(struct mount));
 347
 348     /* Initialize the default IO constraints */
 349     mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
 350     mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
 351
 352         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
 353         (void)vfs_busy(mp, LK_NOWAIT, 0, p);
 354         LIST_INIT(&mp->mnt_vnodelist);
 355         mp->mnt_vfc = vfsp;
 356         mp->mnt_op = vfsp->vfc_vfsops;
 357         mp->mnt_flag = MNT_RDONLY;
 358         mp->mnt_vnodecovered = NULLVP;
 359         vfsp->vfc_refcount++;
 360         mp->mnt_stat.f_type = vfsp->vfc_typenum;
 361         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 362         strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
 363         mp->mnt_stat.f_mntonname[0] = '/';
 364         (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
 365         *mpp = mp;
 366         return (0);
 367 }
 368
 369 /*
 370  * Find an appropriate filesystem to use for the root. If a filesystem
 371  * has not been preselected, walk through the list of known filesystems
 372  * trying those that have mountroot routines, and try them until one
 373  * works or we have tried them all.
 374  */
 375 int
 376 vfs_mountroot()
 377 {
 378         struct vfsconf *vfsp;
 379         extern int (*mountroot)(void);
 380         int error;
 381
 382         if (mountroot != NULL) {
 383                 error = (*mountroot)();
 384                 return (error);
 385         }
 386
 387         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 388                 if (vfsp->vfc_mountroot == NULL)
 389                         continue;
 390                 if ((error = (*vfsp->vfc_mountroot)()) == 0)
 391                         return (0);
 392                 if (error != EINVAL)
 393                         printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
 394         }
 395         return (ENODEV);
 396 }
 397
 398 /*
 399  * Lookup a mount point by filesystem identifier.
 400  */
 401 struct mount *
 402 vfs_getvfs(fsid)
 403         fsid_t *fsid;
 404 {
 405         register struct mount *mp;
 406
 407         simple_lock(&mountlist_slock);
 408         CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
 409                 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 410                     mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 411                         simple_unlock(&mountlist_slock);
 412                         return (mp);
 413                 }
 414         }
 415         simple_unlock(&mountlist_slock);
 416         return ((struct mount *)0);
 417 }
 418
 419 /*
 420  * Get a new unique fsid
 421  */
 422 void
 423 vfs_getnewfsid(mp)
 424         struct mount *mp;
 425 {
 426 static u_short xxxfs_mntid;
 427
 428         fsid_t tfsid;
 429         int mtype;
 430
 431         simple_lock(&mntid_slock);
 432         mtype = mp->mnt_vfc->vfc_typenum;
 433         mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
 434         mp->mnt_stat.f_fsid.val[1] = mtype;
 435         if (xxxfs_mntid == 0)
 436                 ++xxxfs_mntid;
 437         tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
 438         tfsid.val[1] = mtype;
 439         if (!CIRCLEQ_EMPTY(&mountlist)) {
 440                 while (vfs_getvfs(&tfsid)) {
 441                         tfsid.val[0]++;
 442                         xxxfs_mntid++;
 443                 }
 444         }
 445         mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
 446         simple_unlock(&mntid_slock);
 447 }
 448
 449 /*
 450  * Set vnode attributes to VNOVAL
 451  */
 452 void
 453 vattr_null(vap)
 454         register struct vattr *vap;
 455 {
 456
 457         vap->va_type = VNON;
 458         vap->va_size = vap->va_bytes = VNOVAL;
 459         vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
 460                 vap->va_fsid = vap->va_fileid =
 461                 vap->va_blocksize = vap->va_rdev =
 462                 vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
 463                 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
 464                 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
 465                 vap->va_flags = vap->va_gen = VNOVAL;
 466         vap->va_vaflags = 0;
 467 }
 468
 469 /*
 470  * Routines having to do with the management of the vnode table.
 471  */
 472 extern int (**dead_vnodeop_p)(void *);
 473 static void vclean __P((struct vnode *vp, int flag, struct proc *p));
 474 extern void vgonel __P((struct vnode *vp, struct proc *p));
 475 long numvnodes, freevnodes;
 476 long inactivevnodes;
 477 long vnode_reclaim_tried;
 478 long vnode_objects_reclaimed;
 479
 480
 481 extern struct vattr va_null;
 482
 483 /*
 484  * Return the next vnode from the free list.
 485  */
 486 int
 487 getnewvnode(tag, mp, vops, vpp)
 488         enum vtagtype tag;
 489         struct mount *mp;
 490         int (**vops)(void *);
 491         struct vnode **vpp;
 492 {
 493         struct proc *p = current_proc();        /* XXX */
 494         struct vnode *vp;
 495         int cnt, didretry = 0;
 496         static int reused = 0;                          /* track the reuse rate */
 497         int reclaimhits = 0;
 498
 499 retry:
 500         simple_lock(&vnode_free_list_slock);
 501         /*
 502          * MALLOC a vnode if the number of vnodes has not reached the desired
 503          * value and the number on the free list is still reasonable...
 504          * reuse from the freelist even though we may evict a name cache entry
 505          * to reduce the number of vnodes that accumulate.... vnodes tie up
 506          * wired memory and are never garbage collected
 507          */
 508         if (numvnodes < desiredvnodes && (freevnodes < (2 * VNODE_FREE_MIN))) {
 509                 numvnodes++;
 510                 simple_unlock(&vnode_free_list_slock);
 511                 MALLOC_ZONE(vp, struct vnode *, sizeof *vp, M_VNODE, M_WAITOK);
 512                 bzero((char *)vp, sizeof *vp);
 513                 VLISTNONE(vp);          /* avoid double queue removal */
 514                 simple_lock_init(&vp->v_interlock);
 515                 goto done;
 516         }
 517
 518         /*
 519          * Once the desired number of vnodes are allocated,
 520          * we start reusing the vnodes.
 521          */
 522         if (freevnodes < VNODE_FREE_MIN) {
 523                 /*
 524                  * if we are low on vnodes on the freelist attempt to get
 525                  * some back from the inactive list and VM object cache
 526                  */
 527                 simple_unlock(&vnode_free_list_slock);
 528                 (void)vnreclaim(vnodetarget);
 529                 simple_lock(&vnode_free_list_slock);
 530         }
 531         if (numvnodes >= desiredvnodes && reused > VNODE_TOOMANY_REUSED) {
 532                 reused = 0;
 533                 if (freevnodes < VNODE_FREE_ENOUGH) {
 534                         simple_unlock(&vnode_free_list_slock);
 535                         (void)vnreclaim(vnodetarget);
 536                         simple_lock(&vnode_free_list_slock);
 537                 }
 538         }
 539
 540         for (cnt = 0, vp = vnode_free_list.tqh_first;
 541                         vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
 542                 if (simple_lock_try(&vp->v_interlock)) {
 543                         /* got the interlock */
 544                         if (ISSET(vp->v_flag, VORECLAIM)) {
 545                                 /* skip over the vnodes that are being reclaimed */
 546                                 simple_unlock(&vp->v_interlock);
 547                                 reclaimhits++;
 548                         } else
 549                                 break;
 550                 }
 551         }
 552
 553         /*
 554          * Unless this is a bad time of the month, at most
 555          * the first NCPUS items on the free list are
 556          * locked, so this is close enough to being empty.
 557          */
 558         if (vp == NULLVP) {
 559                 simple_unlock(&vnode_free_list_slock);
 560                 if (!(didretry++) && (vnreclaim(vnodetarget) > 0))
 561                         goto retry;
 562                 tablefull("vnode");
 563                 log(LOG_EMERG, "%d vnodes locked, %d desired, %d numvnodes, "
 564                         "%d free, %d inactive, %d being reclaimed\n",
 565                         cnt, desiredvnodes, numvnodes, freevnodes, inactivevnodes,
 566                         reclaimhits);
 567                 *vpp = 0;
 568                 return (ENFILE);
 569         }
 570
 571         if (vp->v_usecount)
 572                 panic("free vnode isn't: v_type = %d, v_usecount = %d?",
 573                                 vp->v_type, vp->v_usecount);
 574
 575         VREMFREE("getnewvnode", vp);
 576         reused++;
 577         simple_unlock(&vnode_free_list_slock);
 578         vp->v_lease = NULL;
 579         cache_purge(vp);
 580         if (vp->v_type != VBAD)
 581                 vgonel(vp, p);  /* clean and reclaim the vnode */
 582         else
 583                 simple_unlock(&vp->v_interlock);
 584 #if DIAGNOSTIC
 585         if (vp->v_data)
 586                 panic("cleaned vnode isn't");
 587         {
 588         int s = splbio();
 589         if (vp->v_numoutput)
 590                 panic("Clean vnode has pending I/O's");
 591         splx(s);
 592         }
 593 #endif
 594         if (UBCINFOEXISTS(vp))
 595                 panic("getnewvnode: ubcinfo not cleaned");
 596         else
 597                 vp->v_ubcinfo = 0;
 598
 599         if (vp->v_flag & VHASDIRTY)
 600                 cluster_release(vp);
 601
 602         // make sure all these fields are cleared out as the
 603         // name/parent stuff uses them and assumes they're
 604         // cleared to null/0.
 605         if (vp->v_scmap != NULL) {
 606             panic("getnewvnode: vp @ 0x%x has non-null scmap.\n", vp);
 607         }
 608         vp->v_un.vu_name = NULL;
 609         vp->v_scdirty = 0;
 610         vp->v_un1.v_cl.v_pad = 0;
 611
 612
 613         vp->v_lastr = -1;
 614         vp->v_ralen = 0;
 615         vp->v_maxra = 0;
 616         vp->v_ciosiz = 0;
 617         vp->v_clen = 0;
 618         vp->v_socket = 0;
 619
 620         /* we may have blocked, re-evaluate state */
 621         simple_lock(&vnode_free_list_slock);
 622         if (VONLIST(vp)) {
 623                 if (vp->v_usecount == 0)
 624                         VREMFREE("getnewvnode", vp);
 625                  else if (ISSET((vp)->v_flag, VUINACTIVE))
 626                         VREMINACTIVE("getnewvnode", vp);
 627         }
 628         simple_unlock(&vnode_free_list_slock);
 629
 630 done:
 631         vp->v_flag = VSTANDARD;
 632         vp->v_type = VNON;
 633         vp->v_tag = tag;
 634         vp->v_op = vops;
 635         insmntque(vp, mp);
 636         *vpp = vp;
 637         vp->v_usecount = 1;
 638         vp->v_data = 0;
 639         return (0);
 640 }
 641
 642 /*
 643  * Move a vnode from one mount queue to another.
 644  */
 645 void
 646 insmntque(vp, mp)
 647         struct vnode *vp;
 648         struct mount *mp;
 649 {
 650
 651         simple_lock(&mntvnode_slock);
 652         /*
 653          * Delete from old mount point vnode list, if on one.
 654          */
 655         if (vp->v_mount != NULL)
 656                 LIST_REMOVE(vp, v_mntvnodes);
 657         /*
 658          * Insert into list of vnodes for the new mount point, if available.
 659          */
 660         if ((vp->v_mount = mp) != NULL)
 661                 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
 662         simple_unlock(&mntvnode_slock);
 663 }
 664
 665 __inline void
 666 vpwakeup(struct vnode *vp)
 667 {
 668         if (vp) {
 669                 if (--vp->v_numoutput < 0)
 670                         panic("vpwakeup: neg numoutput");
 671                 if ((vp->v_flag & VBWAIT || vp->v_flag & VTHROTTLED)
 672                     && vp->v_numoutput <= 0) {
 673                         vp->v_flag &= ~(VBWAIT|VTHROTTLED);
 674                         wakeup((caddr_t)&vp->v_numoutput);
 675                 }
 676         }
 677 }
 678
 679 /*
 680  * Update outstanding I/O count and do wakeup if requested.
 681  */
 682 void
 683 vwakeup(bp)
 684         register struct buf *bp;
 685 {
 686         CLR(bp->b_flags, B_WRITEINPROG);
 687         vpwakeup(bp->b_vp);
 688 }
 689
 690 /*
 691  * Flush out and invalidate all buffers associated with a vnode.
 692  * Called with the underlying object locked.
 693  */
 694 int
 695 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 696         register struct vnode *vp;
 697         int flags;
 698         struct ucred *cred;
 699         struct proc *p;
 700         int slpflag, slptimeo;
 701 {
 702         register struct buf *bp;
 703         struct buf *nbp, *blist;
 704         int s, error = 0;
 705
 706         if (flags & V_SAVE) {
 707                 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) {
 708                         return (error);
 709                 }
 710                 if (vp->v_dirtyblkhd.lh_first)
 711                         panic("vinvalbuf: dirty bufs (vp 0x%x, bp 0x%x)", vp, vp->v_dirtyblkhd.lh_first);
 712         }
 713
 714         for (;;) {
 715                 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
 716                         while (blist && blist->b_lblkno < 0)
 717                                 blist = blist->b_vnbufs.le_next;
 718                 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
 719                     (flags & V_SAVEMETA))
 720                         while (blist && blist->b_lblkno < 0)
 721                                 blist = blist->b_vnbufs.le_next;
 722                 if (!blist)
 723                         break;
 724
 725                 for (bp = blist; bp; bp = nbp) {
 726                         nbp = bp->b_vnbufs.le_next;
 727                         if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
 728                                 continue;
 729                         s = splbio();
 730                         if (ISSET(bp->b_flags, B_BUSY)) {
 731                                 SET(bp->b_flags, B_WANTED);
 732                                 error = tsleep((caddr_t)bp,
 733                                         slpflag | (PRIBIO + 1), "vinvalbuf",
 734                                         slptimeo);
 735                                 splx(s);
 736                                 if (error) {
 737                                         return (error);
 738                                 }
 739                                 break;
 740                         }
 741                         bremfree(bp);
 742                         SET(bp->b_flags, B_BUSY);
 743                         splx(s);
 744                         /*
 745                          * XXX Since there are no node locks for NFS, I believe
 746                          * there is a slight chance that a delayed write will
 747                          * occur while sleeping just above, so check for it.
 748                          */
 749                         if (ISSET(bp->b_flags, B_DELWRI) && (flags & V_SAVE)) {
 750                                 (void) VOP_BWRITE(bp);
 751                                 break;
 752                         }
 753
 754                         if (bp->b_flags & B_LOCKED) {
 755                                 panic("vinvalbuf: bp @ 0x%x is locked!", bp);
 756                                 break;
 757                         } else {
 758                                 SET(bp->b_flags, B_INVAL);
 759                         }
 760                         brelse(bp);
 761                 }
 762         }
 763         if (!(flags & V_SAVEMETA) &&
 764             (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
 765                 panic("vinvalbuf: flush failed");
 766         return (0);
 767 }
 768
 769 /*
 770  * Create a vnode for a block device.
 771  * Used for root filesystem, argdev, and swap areas.
 772  * Also used for memory file system special devices.
 773  */
 774 int
 775 bdevvp(dev, vpp)
 776         dev_t dev;
 777         struct vnode **vpp;
 778 {
 779         register struct vnode *vp;
 780         struct vnode *nvp;
 781         int error;
 782
 783         if (dev == NODEV) {
 784                 *vpp = NULLVP;
 785                 return (ENODEV);
 786         }
 787         error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
 788         if (error) {
 789                 *vpp = NULLVP;
 790                 return (error);
 791         }
 792         vp = nvp;
 793         vp->v_type = VBLK;
 794         if (nvp = checkalias(vp, dev, (struct mount *)0)) {
 795                 vput(vp);
 796                 vp = nvp;
 797         }
 798         *vpp = vp;
 799         return (0);
 800 }
 801
 802 /*
 803  * Check to see if the new vnode represents a special device
 804  * for which we already have a vnode (either because of
 805  * bdevvp() or because of a different vnode representing
 806  * the same block device). If such an alias exists, deallocate
 807  * the existing contents and return the aliased vnode. The
 808  * caller is responsible for filling it with its new contents.
 809  */
 810 struct vnode *
 811 checkalias(nvp, nvp_rdev, mp)
 812         register struct vnode *nvp;
 813         dev_t nvp_rdev;
 814         struct mount *mp;
 815 {
 816         struct proc *p = current_proc();        /* XXX */
 817         struct vnode *vp;
 818         struct vnode **vpp;
 819         struct specinfo *specinfop;
 820
 821         if (nvp->v_type != VBLK && nvp->v_type != VCHR)
 822                 return (NULLVP);
 823
 824         MALLOC_ZONE(specinfop, struct specinfo *, sizeof(struct specinfo),
 825                         M_SPECINFO, M_WAITOK);
 826         vpp = &speclisth[SPECHASH(nvp_rdev)];
 827 loop:
 828         simple_lock(&spechash_slock);
 829         for (vp = *vpp; vp; vp = vp->v_specnext) {
 830                 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
 831                         continue;
 832                 /*
 833                  * Alias, but not in use, so flush it out.
 834                  */
 835                 simple_lock(&vp->v_interlock);
 836                 if (vp->v_usecount == 0) {
 837                         simple_unlock(&spechash_slock);
 838                         vgonel(vp, p);
 839                         goto loop;
 840                 }
 841                 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
 842                         simple_unlock(&spechash_slock);
 843                         goto loop;
 844                 }
 845                 break;
 846         }
 847         if (vp == NULL || vp->v_tag != VT_NON) {
 848                 nvp->v_specinfo = specinfop;
 849                 specinfop = 0;  /* buffer used */
 850                 bzero(nvp->v_specinfo, sizeof(struct specinfo));
 851                 nvp->v_rdev = nvp_rdev;
 852                 nvp->v_hashchain = vpp;
 853                 nvp->v_specnext = *vpp;
 854                 nvp->v_specflags = 0;
 855                 simple_unlock(&spechash_slock);
 856                 *vpp = nvp;
 857                 if (vp != NULLVP) {
 858                         nvp->v_flag |= VALIASED;
 859                         vp->v_flag |= VALIASED;
 860                         vput(vp);
 861                 }
 862                 /* Since buffer is used just return */
 863                 return (NULLVP);
 864         }
 865         simple_unlock(&spechash_slock);
 866         VOP_UNLOCK(vp, 0, p);
 867         simple_lock(&vp->v_interlock);
 868         vclean(vp, 0, p);
 869         vp->v_op = nvp->v_op;
 870         vp->v_tag = nvp->v_tag;
 871         nvp->v_type = VNON;
 872         insmntque(vp, mp);
 873         if (specinfop)
 874                 FREE_ZONE((void *)specinfop, sizeof(struct specinfo), M_SPECINFO);
 875         return (vp);
 876 }
 877
 878 /*
 879  * Get a reference on a particular vnode and lock it if requested.
 880  * If the vnode was on the inactive list, remove it from the list.
 881  * If the vnode was on the free list, remove it from the list and
 882  * move it to inactive list as needed.
 883  * The vnode lock bit is set if the vnode is being eliminated in
 884  * vgone. The process is awakened when the transition is completed,
 885  * and an error returned to indicate that the vnode is no longer
 886  * usable (possibly having been changed to a new file system type).
 887  */
 888 int
 889 vget(vp, flags, p)
 890         struct vnode *vp;
 891         int flags;
 892         struct proc *p;
 893 {
 894         int error = 0;
 895         u_long vpid;
 896
 897         vpid = vp->v_id;    // save off the original v_id
 898
 899 retry:
 900
 901         /*
 902          * If the vnode is in the process of being cleaned out for
 903          * another use, we wait for the cleaning to finish and then
 904          * return failure. Cleaning is determined by checking that
 905          * the VXLOCK flag is set.
 906          */
 907         if ((flags & LK_INTERLOCK) == 0)
 908                 simple_lock(&vp->v_interlock);
 909         if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
 910                 vp->v_flag |= VXWANT;
 911                 simple_unlock(&vp->v_interlock);
 912                 (void)tsleep((caddr_t)vp, PINOD, "vget", 0);
 913                 return (ENOENT);
 914         }
 915
 916         /*
 917          * vnode is being terminated.
 918          * wait for vnode_pager_no_senders() to clear VTERMINATE
 919          */
 920         if (ISSET(vp->v_flag, VTERMINATE)) {
 921                 SET(vp->v_flag, VTERMWANT);
 922                 simple_unlock(&vp->v_interlock);
 923                 (void)tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vget1", 0);
 924                 return (ENOENT);
 925         }
 926
 927         /*
 928          * if the vnode is being initialized,
 929          * wait for it to finish initialization
 930          */
 931         if (ISSET(vp->v_flag,  VUINIT)) {
 932                 SET(vp->v_flag, VUWANT);
 933                 simple_unlock(&vp->v_interlock);
 934                 (void) tsleep((caddr_t)vp, PINOD, "vget2", 0);
 935                 goto retry;
 936         }
 937
 938         simple_lock(&vnode_free_list_slock);
 939         if (VONLIST(vp)) {
 940                 if (vp->v_usecount == 0)
 941                         VREMFREE("vget", vp);
 942                  else if (ISSET((vp)->v_flag, VUINACTIVE))
 943                         VREMINACTIVE("vget", vp);
 944         }
 945         simple_unlock(&vnode_free_list_slock);
 946
 947         if (++vp->v_usecount <= 0)
 948                 panic("vget: v_usecount");
 949
 950         /*
 951          * Recover named reference as needed
 952          */
 953         if (UBCISVALID(vp) && !ubc_issetflags(vp, UI_HASOBJREF)) {
 954                 simple_unlock(&vp->v_interlock);
 955                 if (ubc_getobject(vp, UBC_HOLDOBJECT) == MEMORY_OBJECT_CONTROL_NULL) {
 956                         error = ENOENT;
 957                         goto errout;
 958                 }
 959                 simple_lock(&vp->v_interlock);
 960         }
 961
 962         if (flags & LK_TYPE_MASK) {
 963                 if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
 964                         goto errout;
 965                 if (vpid != vp->v_id) {    // make sure it's still the same vnode
 966                     vput(vp);
 967                     return ENOENT;
 968                 }
 969                 return (0);
 970         }
 971
 972         if ((flags & LK_INTERLOCK) == 0)
 973                 simple_unlock(&vp->v_interlock);
 974
 975         if (vpid != vp->v_id) {            // make sure it's still the same vnode
 976             vrele(vp);
 977             return ENOENT;
 978         }
 979
 980         return (0);
 981
 982 errout:
 983         simple_lock(&vp->v_interlock);
 984
 985         /*
 986          * we may have blocked. Re-evaluate the state
 987          */
 988         simple_lock(&vnode_free_list_slock);
 989         if (VONLIST(vp)) {
 990                 if (vp->v_usecount == 0)
 991                         VREMFREE("vget", vp);
 992                  else if (ISSET((vp)->v_flag, VUINACTIVE))
 993                         VREMINACTIVE("vget", vp);
 994         }
 995         simple_unlock(&vnode_free_list_slock);
 996
 997         /*
 998          * If the vnode was not active in the first place
 999          * must not call vrele() as VOP_INACTIVE() is not
1000          * required.
1001          * So inlined part of vrele() here.
1002          */
1003         if (--vp->v_usecount == 1) {
1004                 if (UBCINFOEXISTS(vp)) {
1005                         vinactive(vp);
1006                         simple_unlock(&vp->v_interlock);
1007                         return (error);
1008                 }
1009         }
1010         if (vp->v_usecount > 0) {
1011                 simple_unlock(&vp->v_interlock);
1012                 return (error);
1013         }
1014         if (vp->v_usecount < 0)
1015                 panic("vget: negative usecount (%d)", vp->v_usecount);
1016         vfree(vp);
1017         simple_unlock(&vp->v_interlock);
1018         return (error);
1019 }
1020
1021 /*
1022  * Get a pager reference on the particular vnode.
1023  *
1024  * This is called from ubc_info_init() and it is asumed that
1025  * the vnode is not on the free list.
1026  * It is also assumed that the vnode is neither being recycled
1027  * by vgonel nor being terminated by vnode_pager_vrele().
1028  *
1029  * The vnode interlock is NOT held by the caller.
1030  */
1031 __private_extern__ int
1032 vnode_pager_vget(vp)
1033         struct vnode *vp;
1034 {
1035         simple_lock(&vp->v_interlock);
1036
1037         UBCINFOCHECK("vnode_pager_vget", vp);
1038
1039         if (ISSET(vp->v_flag, (VXLOCK|VORECLAIM|VTERMINATE)))
1040                 panic("%s: dying vnode", "vnode_pager_vget");
1041
1042         simple_lock(&vnode_free_list_slock);
1043         /* The vnode should not be on free list */
1044         if (VONLIST(vp)) {
1045                 if (vp->v_usecount == 0)
1046                         panic("%s: still on list", "vnode_pager_vget");
1047                 else if (ISSET((vp)->v_flag, VUINACTIVE))
1048                         VREMINACTIVE("vnode_pager_vget", vp);
1049         }
1050
1051         /* The vnode should not be on the inactive list here */
1052         simple_unlock(&vnode_free_list_slock);
1053
1054         /* After all those checks, now do the real work :-) */
1055         if (++vp->v_usecount <= 0)
1056                 panic("vnode_pager_vget: v_usecount");
1057         simple_unlock(&vp->v_interlock);
1058
1059         return (0);
1060 }
1061
1062 /*
1063  * Stubs to use when there is no locking to be done on the underlying object.
1064  * A minimal shared lock is necessary to ensure that the underlying object
1065  * is not revoked while an operation is in progress. So, an active shared
1066  * count is maintained in an auxillary vnode lock structure.
1067  */
1068 int
1069 vop_nolock(ap)
1070         struct vop_lock_args /* {
1071                 struct vnode *a_vp;
1072                 int a_flags;
1073                 struct proc *a_p;
1074         } */ *ap;
1075 {
1076 #ifdef notyet
1077         /*
1078          * This code cannot be used until all the non-locking filesystems
1079          * (notably NFS) are converted to properly lock and release nodes.
1080          * Also, certain vnode operations change the locking state within
1081          * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
1082          * and symlink). Ideally these operations should not change the
1083          * lock state, but should be changed to let the caller of the
1084          * function unlock them. Otherwise all intermediate vnode layers
1085          * (such as union, umapfs, etc) must catch these functions to do
1086          * the necessary locking at their layer. Note that the inactive
1087          * and lookup operations also change their lock state, but this
1088          * cannot be avoided, so these two operations will always need
1089          * to be handled in intermediate layers.
1090          */
1091         struct vnode *vp = ap->a_vp;
1092         int vnflags, flags = ap->a_flags;
1093
1094         if (vp->v_vnlock == NULL) {
1095                 if ((flags & LK_TYPE_MASK) == LK_DRAIN)
1096                         return (0);
1097                 MALLOC(vp->v_vnlock, struct lock__bsd__ *,
1098                                 sizeof(struct lock__bsd__), M_TEMP, M_WAITOK);
1099                 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1100         }
1101         switch (flags & LK_TYPE_MASK) {
1102         case LK_DRAIN:
1103                 vnflags = LK_DRAIN;
1104                 break;
1105         case LK_EXCLUSIVE:
1106         case LK_SHARED:
1107                 vnflags = LK_SHARED;
1108                 break;
1109         case LK_UPGRADE:
1110         case LK_EXCLUPGRADE:
1111         case LK_DOWNGRADE:
1112                 return (0);
1113         case LK_RELEASE:
1114         default:
1115                 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
1116         }
1117         if (flags & LK_INTERLOCK)
1118                 vnflags |= LK_INTERLOCK;
1119         return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
1120 #else /* for now */
1121         /*
1122          * Since we are not using the lock manager, we must clear
1123          * the interlock here.
1124          */
1125         if (ap->a_flags & LK_INTERLOCK)
1126                 simple_unlock(&ap->a_vp->v_interlock);
1127         return (0);
1128 #endif
1129 }
1130
1131 /*
1132  * Decrement the active use count.
1133  */
1134 int
1135 vop_nounlock(ap)
1136         struct vop_unlock_args /* {
1137                 struct vnode *a_vp;
1138                 int a_flags;
1139                 struct proc *a_p;
1140         } */ *ap;
1141 {
1142         struct vnode *vp = ap->a_vp;
1143
1144         if (vp->v_vnlock == NULL)
1145                 return (0);
1146         return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p));
1147 }
1148
1149 /*
1150  * Return whether or not the node is in use.
1151  */
1152 int
1153 vop_noislocked(ap)
1154         struct vop_islocked_args /* {
1155                 struct vnode *a_vp;
1156         } */ *ap;
1157 {
1158         struct vnode *vp = ap->a_vp;
1159
1160         if (vp->v_vnlock == NULL)
1161                 return (0);
1162         return (lockstatus(vp->v_vnlock));
1163 }
1164
1165 /*
1166  * Vnode reference.
1167  */
1168 void
1169 vref(vp)
1170         struct vnode *vp;
1171 {
1172
1173         simple_lock(&vp->v_interlock);
1174         if (vp->v_usecount <= 0)
1175                 panic("vref used where vget required");
1176
1177         /* If on the inactive list, remove it from there */
1178         simple_lock(&vnode_free_list_slock);
1179         if (ISSET((vp)->v_flag, VUINACTIVE))
1180                 VREMINACTIVE("vref", vp);
1181         simple_unlock(&vnode_free_list_slock);
1182
1183         if (++vp->v_usecount <= 0)
1184                 panic("vref v_usecount");
1185         simple_unlock(&vp->v_interlock);
1186 }
1187
1188 static void
1189 clean_up_name_parent_ptrs(struct vnode *vp)
1190 {
1191     if (VNAME(vp) || VPARENT(vp)) {
1192         char *tmp1;
1193         struct vnode *tmp2;
1194
1195         // do it this way so we don't block before clearing
1196         // these fields.
1197         tmp1 = VNAME(vp);
1198         tmp2 = VPARENT(vp);
1199         VNAME(vp) = NULL;
1200         VPARENT(vp) = NULL;
1201
1202         if (tmp1) {
1203             remove_name(tmp1);
1204         }
1205
1206         if (tmp2) {
1207             vrele(tmp2);
1208         }
1209     }
1210 }
1211
1212
1213 /*
1214  * put the vnode on appropriate free list.
1215  * called with v_interlock held.
1216  */
1217 static void
1218 vfree(vp)
1219         struct vnode *vp;
1220 {
1221         funnel_t *curflock;
1222         extern int disable_funnel;
1223
1224         if ((curflock = thread_funnel_get()) != kernel_flock &&
1225             !(disable_funnel && curflock != THR_FUNNEL_NULL))
1226                 panic("Entering vfree() without kernel funnel");
1227
1228         /*
1229          * if the vnode is not obtained by calling getnewvnode() we
1230          * are not responsible for the cleanup. Just return.
1231          */
1232         if (!(vp->v_flag & VSTANDARD)) {
1233                 return;
1234         }
1235
1236         if (vp->v_usecount != 0)
1237                 panic("vfree: v_usecount");
1238
1239         /* insert at tail of LRU list or at head if VAGE is set */
1240         simple_lock(&vnode_free_list_slock);
1241
1242         // make sure the name & parent pointers get cleared out
1243 //      clean_up_name_parent_ptrs(vp);
1244
1245         if (VONLIST(vp))
1246                  panic("%s: vnode still on list", "vfree");
1247
1248         if (vp->v_flag & VAGE) {
1249                 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1250                 vp->v_flag &= ~VAGE;
1251         } else
1252                 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1253         freevnodes++;
1254         simple_unlock(&vnode_free_list_slock);
1255         return;
1256 }
1257
1258 /*
1259  * put the vnode on the inactive list.
1260  * called with v_interlock held
1261  */
1262 static void
1263 vinactive(vp)
1264         struct vnode *vp;
1265 {
1266         funnel_t *curflock;
1267         extern int disable_funnel;
1268
1269         if ((curflock = thread_funnel_get()) != kernel_flock &&
1270             !(disable_funnel && curflock != THR_FUNNEL_NULL))
1271                 panic("Entering vinactive() without kernel funnel");
1272
1273         if (!UBCINFOEXISTS(vp))
1274                 panic("vinactive: not a UBC vnode");
1275
1276         if (vp->v_usecount != 1)
1277                 panic("vinactive: v_usecount");
1278
1279         simple_lock(&vnode_free_list_slock);
1280
1281         if (VONLIST(vp))
1282                  panic("%s: vnode still on list", "vinactive");
1283         VINACTIVECHECK("vinactive", vp, 0);
1284
1285         TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_freelist);
1286         SET(vp->v_flag, VUINACTIVE);
1287         CLR(vp->v_flag, (VNOCACHE_DATA | VRAOFF));
1288
1289         inactivevnodes++;
1290         simple_unlock(&vnode_free_list_slock);
1291         return;
1292 }
1293
1294
1295 /*
1296  * vput(), just unlock and vrele()
1297  */
1298 void
1299 vput(vp)
1300         struct vnode *vp;
1301 {
1302         struct proc *p = current_proc();        /* XXX */
1303
1304         simple_lock(&vp->v_interlock);
1305         if (--vp->v_usecount == 1) {
1306                 if (UBCINFOEXISTS(vp)) {
1307                         vinactive(vp);
1308                         simple_unlock(&vp->v_interlock);
1309                         VOP_UNLOCK(vp, 0, p);
1310                         return;
1311                 }
1312         }
1313         if (vp->v_usecount > 0) {
1314                 simple_unlock(&vp->v_interlock);
1315                 VOP_UNLOCK(vp, 0, p);
1316                 return;
1317         }
1318 #if DIAGNOSTIC
1319         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1320                 vprint("vput: bad ref count", vp);
1321                 panic("vput: v_usecount = %d, v_writecount = %d",
1322                         vp->v_usecount, vp->v_writecount);
1323         }
1324 #endif
1325         simple_lock(&vnode_free_list_slock);
1326         if (ISSET((vp)->v_flag, VUINACTIVE))
1327                 VREMINACTIVE("vref", vp);
1328         simple_unlock(&vnode_free_list_slock);
1329
1330         simple_unlock(&vp->v_interlock);
1331         VOP_INACTIVE(vp, p);
1332         /*
1333          * The interlock is not held and
1334          * VOP_INCATIVE releases the vnode lock.
1335          * We could block and the vnode might get reactivated
1336          * Can not just call vfree without checking the state
1337          */
1338         simple_lock(&vp->v_interlock);
1339         if (!VONLIST(vp)) {
1340                 if (vp->v_usecount == 0)
1341                         vfree(vp);
1342                 else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp))
1343                         vinactive(vp);
1344         }
1345         simple_unlock(&vp->v_interlock);
1346 }
1347
1348 /*
1349  * Vnode release.
1350  * If count drops to zero, call inactive routine and return to freelist.
1351  */
1352 void
1353 vrele(vp)
1354         struct vnode *vp;
1355 {
1356         struct proc *p = current_proc();        /* XXX */
1357         funnel_t *curflock;
1358         extern int disable_funnel;
1359
1360         if ((curflock = thread_funnel_get()) != kernel_flock &&
1361             !(disable_funnel && curflock != THR_FUNNEL_NULL))
1362                 panic("Entering vrele() without kernel funnel");
1363
1364         simple_lock(&vp->v_interlock);
1365         if (--vp->v_usecount == 1) {
1366                 if (UBCINFOEXISTS(vp)) {
1367                         if ((vp->v_flag & VXLOCK) == 0)
1368                                 vinactive(vp);
1369                         simple_unlock(&vp->v_interlock);
1370                         return;
1371                 }
1372         }
1373         if (vp->v_usecount > 0) {
1374                 simple_unlock(&vp->v_interlock);
1375                 return;
1376         }
1377 #if DIAGNOSTIC
1378         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1379                 vprint("vrele: bad ref count", vp);
1380                 panic("vrele: ref cnt");
1381         }
1382 #endif
1383
1384         if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
1385                 /* vnode is being cleaned, just return */
1386                 vfree(vp);
1387                 simple_unlock(&vp->v_interlock);
1388                 return;
1389         }
1390
1391         if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1392                 VOP_INACTIVE(vp, p);
1393                 /*
1394                  * vn_lock releases the interlock and
1395                  * VOP_INCATIVE releases the vnode lock.
1396                  * We could block and the vnode might get reactivated
1397                  * Can not just call vfree without checking the state
1398                  */
1399                 simple_lock(&vp->v_interlock);
1400                 if (!VONLIST(vp)) {
1401                         if (vp->v_usecount == 0)
1402                                 vfree(vp);
1403                         else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp))
1404                                 vinactive(vp);
1405                 }
1406                 simple_unlock(&vp->v_interlock);
1407         }
1408 #if 0
1409         else {
1410                 vfree(vp);
1411                 simple_unlock(&vp->v_interlock);
1412                 kprintf("vrele: vn_lock() failed for vp = 0x%08x\n", vp);
1413         }
1414 #endif
1415 }
1416
1417 void
1418 vagevp(vp)
1419         struct vnode *vp;
1420 {
1421         simple_lock(&vp->v_interlock);
1422         vp->v_flag |= VAGE;
1423         simple_unlock(&vp->v_interlock);
1424         return;
1425 }
1426
1427 /*
1428  * Page or buffer structure gets a reference.
1429  */
1430 void
1431 vhold(vp)
1432         register struct vnode *vp;
1433 {
1434
1435         simple_lock(&vp->v_interlock);
1436         vp->v_holdcnt++;
1437         simple_unlock(&vp->v_interlock);
1438 }
1439
1440 /*
1441  * Page or buffer structure frees a reference.
1442  */
1443 void
1444 holdrele(vp)
1445         register struct vnode *vp;
1446 {
1447
1448         simple_lock(&vp->v_interlock);
1449         if (vp->v_holdcnt <= 0)
1450                 panic("holdrele: holdcnt");
1451         vp->v_holdcnt--;
1452         simple_unlock(&vp->v_interlock);
1453 }
1454
1455 /*
1456  * Remove any vnodes in the vnode table belonging to mount point mp.
1457  *
1458  * If MNT_NOFORCE is specified, there should not be any active ones,
1459  * return error if any are found (nb: this is a user error, not a
1460  * system error). If MNT_FORCE is specified, detach any active vnodes
1461  * that are found.
1462  */
1463 #if DIAGNOSTIC
1464 int busyprt = 0;        /* print out busy vnodes */
1465 #if 0
1466 struct ctldebug debug1 = { "busyprt", &busyprt };
1467 #endif /* 0 */
1468 #endif
1469
1470 int
1471 vflush(mp, skipvp, flags)
1472         struct mount *mp;
1473         struct vnode *skipvp;
1474         int flags;
1475 {
1476         struct proc *p = current_proc();
1477         struct vnode *vp, *nvp;
1478         int busy = 0;
1479
1480         simple_lock(&mntvnode_slock);
1481 loop:
1482         for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1483                 if (vp->v_mount != mp)
1484                         goto loop;
1485                 nvp = vp->v_mntvnodes.le_next;
1486                 /*
1487                  * Skip over a selected vnode.
1488                  */
1489                 if (vp == skipvp)
1490                         continue;
1491
1492                 simple_lock(&vp->v_interlock);
1493                 /*
1494                  * Skip over a vnodes marked VSYSTEM or VNOFLUSH.
1495                  */
1496                 if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || (vp->v_flag & VNOFLUSH))) {
1497                         simple_unlock(&vp->v_interlock);
1498                         continue;
1499                 }
1500                 /*
1501                  * Skip over a vnodes marked VSWAP.
1502                  */
1503                 if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) {
1504                         simple_unlock(&vp->v_interlock);
1505                         continue;
1506                 }
1507                 /*
1508                  * If WRITECLOSE is set, only flush out regular file
1509                  * vnodes open for writing.
1510                  */
1511                 if ((flags & WRITECLOSE) &&
1512                     (vp->v_writecount == 0 || vp->v_type != VREG)) {
1513                         simple_unlock(&vp->v_interlock);
1514                         continue;
1515                 }
1516                 /*
1517                  * With v_usecount == 0, all we need to do is clear
1518                  * out the vnode data structures and we are done.
1519                  */
1520                 if (vp->v_usecount == 0) {
1521                         simple_unlock(&mntvnode_slock);
1522                         vgonel(vp, p);
1523                         simple_lock(&mntvnode_slock);
1524                         continue;
1525                 }
1526                 /*
1527                  * If FORCECLOSE is set, forcibly close the vnode.
1528                  * For block or character devices, revert to an
1529                  * anonymous device. For all other files, just kill them.
1530                  */
1531                 if (flags & FORCECLOSE) {
1532                         simple_unlock(&mntvnode_slock);
1533                         if (vp->v_type != VBLK && vp->v_type != VCHR) {
1534                                 vgonel(vp, p);
1535                         } else {
1536                                 vclean(vp, 0, p);
1537                                 vp->v_op = spec_vnodeop_p;
1538                                 insmntque(vp, (struct mount *)0);
1539                         }
1540                         simple_lock(&mntvnode_slock);
1541                         continue;
1542                 }
1543 #if DIAGNOSTIC
1544                 if (busyprt)
1545                         vprint("vflush: busy vnode", vp);
1546 #endif
1547                 simple_unlock(&vp->v_interlock);
1548                 busy++;
1549         }
1550         simple_unlock(&mntvnode_slock);
1551         if (busy && ((flags & FORCECLOSE)==0))
1552                 return (EBUSY);
1553         return (0);
1554 }
1555
1556 /*
1557  * Disassociate the underlying file system from a vnode.
1558  * The vnode interlock is held on entry.
1559  */
1560 static void
1561 vclean(vp, flags, p)
1562         struct vnode *vp;
1563         int flags;
1564         struct proc *p;
1565 {
1566         int active;
1567         int didhold;
1568
1569         /*
1570          * if the vnode is not obtained by calling getnewvnode() we
1571          * are not responsible for the cleanup. Just return.
1572          */
1573         if (!(vp->v_flag & VSTANDARD)) {
1574                 simple_unlock(&vp->v_interlock);
1575                 return;
1576         }
1577
1578         /*
1579          * Check to see if the vnode is in use.
1580          * If so we have to reference it before we clean it out
1581          * so that its count cannot fall to zero and generate a
1582          * race against ourselves to recycle it.
1583          */
1584         if (active = vp->v_usecount) {
1585                 /*
1586                  * active vnode can not be on the free list.
1587                  * we are about to take an extra reference on this vnode
1588                  * do the queue management as needed
1589                  * Not doing so can cause "still on list" or
1590                  * "vnreclaim: v_usecount" panic if VOP_LOCK() blocks.
1591                  */
1592                 simple_lock(&vnode_free_list_slock);
1593                 if (ISSET((vp)->v_flag, VUINACTIVE))
1594                         VREMINACTIVE("vclean", vp);
1595                 simple_unlock(&vnode_free_list_slock);
1596
1597                 if (++vp->v_usecount <= 0)
1598                         panic("vclean: v_usecount");
1599         }
1600
1601         /*
1602          * Prevent the vnode from being recycled or
1603          * brought into use while we clean it out.
1604          */
1605         if (vp->v_flag & VXLOCK)
1606                 panic("vclean: deadlock");
1607         vp->v_flag |= VXLOCK;
1608
1609         /*
1610          * Even if the count is zero, the VOP_INACTIVE routine may still
1611          * have the object locked while it cleans it out. The VOP_LOCK
1612          * ensures that the VOP_INACTIVE routine is done with its work.
1613          * For active vnodes, it ensures that no other activity can
1614          * occur while the underlying object is being cleaned out.
1615          */
1616         VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1617
1618         /*
1619          * While blocked in VOP_LOCK() someone could have dropped
1620          * reference[s] and we could land on the inactive list.
1621          * if this vnode is on the inactive list
1622          * take it off the list.
1623          */
1624         simple_lock(&vnode_free_list_slock);
1625         if (ISSET((vp)->v_flag, VUINACTIVE))
1626                 VREMINACTIVE("vclean", vp);
1627         simple_unlock(&vnode_free_list_slock);
1628
1629         /* Clean the pages in VM. */
1630         if (active && (flags & DOCLOSE))
1631                 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1632
1633         /* Clean the pages in VM. */
1634         didhold = ubc_hold(vp);
1635         if ((active) && (didhold))
1636                 (void)ubc_clean(vp, 0); /* do not invalidate */
1637
1638         /*
1639          * Clean out any buffers associated with the vnode.
1640          */
1641         if (flags & DOCLOSE) {
1642                 if (vp->v_tag == VT_NFS)
1643                         nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
1644                 else
1645                         vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1646         }
1647
1648         if (active)
1649                 VOP_INACTIVE(vp, p);
1650         else
1651                 VOP_UNLOCK(vp, 0, p);
1652
1653         /* Destroy ubc named reference */
1654         if (didhold) {
1655                 ubc_rele(vp);
1656                 ubc_destroy_named(vp);
1657         }
1658         /*
1659          * Make sure vp isn't on the inactive list.
1660          */
1661         simple_lock(&vnode_free_list_slock);
1662         if (ISSET((vp)->v_flag, VUINACTIVE)) {
1663                 VREMINACTIVE("vclean", vp);
1664         }
1665         simple_unlock(&vnode_free_list_slock);
1666
1667         /*
1668          * Reclaim the vnode.
1669          */
1670         if (VOP_RECLAIM(vp, p))
1671                 panic("vclean: cannot reclaim");
1672
1673         // make sure the name & parent ptrs get cleaned out!
1674         clean_up_name_parent_ptrs(vp);
1675
1676         cache_purge(vp);
1677         if (vp->v_vnlock) {
1678                 struct lock__bsd__ *tmp = vp->v_vnlock;
1679                 if ((tmp->lk_flags & LK_DRAINED) == 0)
1680                         vprint("vclean: lock not drained", vp);
1681                 vp->v_vnlock = NULL;
1682                 FREE(tmp, M_TEMP);
1683         }
1684
1685         /* It's dead, Jim! */
1686         vp->v_op = dead_vnodeop_p;
1687         vp->v_tag = VT_NON;
1688
1689         insmntque(vp, (struct mount *)0);
1690
1691         /*
1692          * Done with purge, notify sleepers of the grim news.
1693          */
1694         vp->v_flag &= ~VXLOCK;
1695         if (vp->v_flag & VXWANT) {
1696                 vp->v_flag &= ~VXWANT;
1697                 wakeup((caddr_t)vp);
1698         }
1699
1700         if (active)
1701                 vrele(vp);
1702 }
1703
1704 /*
1705  * Eliminate all activity associated with  the requested vnode
1706  * and with all vnodes aliased to the requested vnode.
1707  */
1708 int
1709 vop_revoke(ap)
1710         struct vop_revoke_args /* {
1711                 struct vnode *a_vp;
1712                 int a_flags;
1713         } */ *ap;
1714 {
1715         struct vnode *vp, *vq;
1716         struct proc *p = current_proc();
1717
1718 #if DIAGNOSTIC
1719         if ((ap->a_flags & REVOKEALL) == 0)
1720                 panic("vop_revoke");
1721 #endif
1722
1723         vp = ap->a_vp;
1724         simple_lock(&vp->v_interlock);
1725
1726         if (vp->v_flag & VALIASED) {
1727                 /*
1728                  * If a vgone (or vclean) is already in progress,
1729                  * wait until it is done and return.
1730                  */
1731                 if (vp->v_flag & VXLOCK) {
1732                         while (vp->v_flag & VXLOCK) {
1733                                 vp->v_flag |= VXWANT;
1734                                 simple_unlock(&vp->v_interlock);
1735                                 (void)tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1736                         }
1737                         return (0);
1738                 }
1739                 /*
1740                  * Ensure that vp will not be vgone'd while we
1741                  * are eliminating its aliases.
1742                  */
1743                 vp->v_flag |= VXLOCK;
1744                 simple_unlock(&vp->v_interlock);
1745                 while (vp->v_flag & VALIASED) {
1746                         simple_lock(&spechash_slock);
1747                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1748                                 if (vq->v_rdev != vp->v_rdev ||
1749                                     vq->v_type != vp->v_type || vp == vq)
1750                                         continue;
1751                                 simple_unlock(&spechash_slock);
1752                                 vgone(vq);
1753                                 break;
1754                         }
1755                         if (vq == NULLVP)
1756                                 simple_unlock(&spechash_slock);
1757                 }
1758                 /*
1759                  * Remove the lock so that vgone below will
1760                  * really eliminate the vnode after which time
1761                  * vgone will awaken any sleepers.
1762                  */
1763                 simple_lock(&vp->v_interlock);
1764                 vp->v_flag &= ~VXLOCK;
1765         }
1766         vgonel(vp, p);
1767         return (0);
1768 }
1769
1770 /*
1771  * Recycle an unused vnode to the front of the free list.
1772  * Release the passed interlock if the vnode will be recycled.
1773  */
1774 int
1775 vrecycle(vp, inter_lkp, p)
1776         struct vnode *vp;
1777         struct slock *inter_lkp;
1778         struct proc *p;
1779 {
1780
1781         simple_lock(&vp->v_interlock);
1782         if (vp->v_usecount == 0) {
1783                 if (inter_lkp)
1784                         simple_unlock(inter_lkp);
1785                 vgonel(vp, p);
1786                 return (1);
1787         }
1788         simple_unlock(&vp->v_interlock);
1789         return (0);
1790 }
1791
1792 /*
1793  * Eliminate all activity associated with a vnode
1794  * in preparation for reuse.
1795  */
1796 void
1797 vgone(vp)
1798         struct vnode *vp;
1799 {
1800         struct proc *p = current_proc();
1801
1802         simple_lock(&vp->v_interlock);
1803         vgonel(vp, p);
1804 }
1805
1806 /*
1807  * vgone, with the vp interlock held.
1808  */
1809 void
1810 vgonel(vp, p)
1811         struct vnode *vp;
1812         struct proc *p;
1813 {
1814         struct vnode *vq;
1815         struct vnode *vx;
1816
1817         /*
1818          * if the vnode is not obtained by calling getnewvnode() we
1819          * are not responsible for the cleanup. Just return.
1820          */
1821         if (!(vp->v_flag & VSTANDARD)) {
1822                 simple_unlock(&vp->v_interlock);
1823                 return;
1824         }
1825
1826         /*
1827          * If a vgone (or vclean) is already in progress,
1828          * wait until it is done and return.
1829          */
1830         if (vp->v_flag & VXLOCK) {
1831                 while (vp->v_flag & VXLOCK) {
1832                         vp->v_flag |= VXWANT;
1833                         simple_unlock(&vp->v_interlock);
1834                         (void)tsleep((caddr_t)vp, PINOD, "vgone", 0);
1835                 }
1836                 return;
1837         }
1838         /*
1839          * Clean out the filesystem specific data.
1840          */
1841         vclean(vp, DOCLOSE, p);
1842         /*
1843          * Delete from old mount point vnode list, if on one.
1844          */
1845         if (vp->v_mount != NULL)
1846                 insmntque(vp, (struct mount *)0);
1847         /*
1848          * If special device, remove it from special device alias list
1849          * if it is on one.
1850          */
1851         if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1852                 simple_lock(&spechash_slock);
1853                 if (*vp->v_hashchain == vp) {
1854                         *vp->v_hashchain = vp->v_specnext;
1855                 } else {
1856                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1857                                 if (vq->v_specnext != vp)
1858                                         continue;
1859                                 vq->v_specnext = vp->v_specnext;
1860                                 break;
1861                         }
1862                         if (vq == NULL)
1863                                 panic("missing bdev");
1864                 }
1865                 if (vp->v_flag & VALIASED) {
1866                         vx = NULL;
1867                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1868                                 if (vq->v_rdev != vp->v_rdev ||
1869                                     vq->v_type != vp->v_type)
1870                                         continue;
1871                                 if (vx)
1872                                         break;
1873                                 vx = vq;
1874                         }
1875                         if (vx == NULL)
1876                                 panic("missing alias");
1877                         if (vq == NULL)
1878                                 vx->v_flag &= ~VALIASED;
1879                         vp->v_flag &= ~VALIASED;
1880                 }
1881                 simple_unlock(&spechash_slock);
1882                 {
1883                 struct specinfo *tmp = vp->v_specinfo;
1884                 vp->v_specinfo = NULL;
1885                 FREE_ZONE((void *)tmp, sizeof(struct specinfo), M_SPECINFO);
1886                 }
1887         }
1888         /*
1889          * If it is on the freelist and not already at the head,
1890          * move it to the head of the list. The test of the back
1891          * pointer and the reference count of zero is because
1892          * it will be removed from the free list by getnewvnode,
1893          * but will not have its reference count incremented until
1894          * after calling vgone. If the reference count were
1895          * incremented first, vgone would (incorrectly) try to
1896          * close the previous instance of the underlying object.
1897          * So, the back pointer is explicitly set to `0xdeadb' in
1898          * getnewvnode after removing it from the freelist to ensure
1899          * that we do not try to move it here.
1900          */
1901         if (vp->v_usecount == 0 && (vp->v_flag & VUINACTIVE) == 0) {
1902                 simple_lock(&vnode_free_list_slock);
1903                 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1904                     vnode_free_list.tqh_first != vp) {
1905                         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1906                         TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1907                 }
1908                 simple_unlock(&vnode_free_list_slock);
1909         }
1910         vp->v_type = VBAD;
1911 }
1912
1913 /*
1914  * Lookup a vnode by device number.
1915  */
1916 int
1917 vfinddev(dev, type, vpp)
1918         dev_t dev;
1919         enum vtype type;
1920         struct vnode **vpp;
1921 {
1922         struct vnode *vp;
1923         int rc = 0;
1924
1925         simple_lock(&spechash_slock);
1926         for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1927                 if (dev != vp->v_rdev || type != vp->v_type)
1928                         continue;
1929                 *vpp = vp;
1930                 rc = 1;
1931                 break;
1932         }
1933         simple_unlock(&spechash_slock);
1934         return (rc);
1935 }
1936
1937 /*
1938  * Calculate the total number of references to a special device.
1939  */
1940 int
1941 vcount(vp)
1942         struct vnode *vp;
1943 {
1944         struct vnode *vq, *vnext;
1945         int count;
1946
1947 loop:
1948         if ((vp->v_flag & VALIASED) == 0)
1949                 return (vp->v_usecount);
1950         simple_lock(&spechash_slock);
1951         for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1952                 vnext = vq->v_specnext;
1953                 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1954                         continue;
1955                 /*
1956                  * Alias, but not in use, so flush it out.
1957                  */
1958                 if (vq->v_usecount == 0 && vq != vp) {
1959                         simple_unlock(&spechash_slock);
1960                         vgone(vq);
1961                         goto loop;
1962                 }
1963                 count += vq->v_usecount;
1964         }
1965         simple_unlock(&spechash_slock);
1966         return (count);
1967 }
1968
1969 int     prtactive = 0;          /* 1 => print out reclaim of active vnodes */
1970
1971 /*
1972  * Print out a description of a vnode.
1973  */
1974 static char *typename[] =
1975    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1976
1977 void
1978 vprint(label, vp)
1979         char *label;
1980         register struct vnode *vp;
1981 {
1982         char buf[64];
1983
1984         if (label != NULL)
1985                 printf("%s: ", label);
1986         printf("type %s, usecount %d, writecount %d, refcount %d,",
1987                 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1988                 vp->v_holdcnt);
1989         buf[0] = '\0';
1990         if (vp->v_flag & VROOT)
1991                 strcat(buf, "|VROOT");
1992         if (vp->v_flag & VTEXT)
1993                 strcat(buf, "|VTEXT");
1994         if (vp->v_flag & VSYSTEM)
1995                 strcat(buf, "|VSYSTEM");
1996         if (vp->v_flag & VNOFLUSH)
1997                 strcat(buf, "|VNOFLUSH");
1998         if (vp->v_flag & VXLOCK)
1999                 strcat(buf, "|VXLOCK");
2000         if (vp->v_flag & VXWANT)
2001                 strcat(buf, "|VXWANT");
2002         if (vp->v_flag & VBWAIT)
2003                 strcat(buf, "|VBWAIT");
2004         if (vp->v_flag & VALIASED)
2005                 strcat(buf, "|VALIASED");
2006         if (buf[0] != '\0')
2007                 printf(" flags (%s)", &buf[1]);
2008         if (vp->v_data == NULL) {
2009                 printf("\n");
2010         } else {
2011                 printf("\n\t");
2012                 VOP_PRINT(vp);
2013         }
2014 }
2015
2016 #ifdef DEBUG
2017 /*
2018  * List all of the locked vnodes in the system.
2019  * Called when debugging the kernel.
2020  */
2021 void
2022 printlockedvnodes()
2023 {
2024         struct proc *p = current_proc();
2025         struct mount *mp, *nmp;
2026         struct vnode *vp;
2027
2028         printf("Locked vnodes\n");
2029         simple_lock(&mountlist_slock);
2030         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2031                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
2032                         nmp = mp->mnt_list.cqe_next;
2033                         continue;
2034                 }
2035                 for (vp = mp->mnt_vnodelist.lh_first;
2036                      vp != NULL;
2037                      vp = vp->v_mntvnodes.le_next) {
2038                         if (VOP_ISLOCKED(vp))
2039                                 vprint((char *)0, vp);
2040                 }
2041                 simple_lock(&mountlist_slock);
2042                 nmp = mp->mnt_list.cqe_next;
2043                 vfs_unbusy(mp, p);
2044         }
2045         simple_unlock(&mountlist_slock);
2046 }
2047 #endif
2048
2049 static int
2050 build_path(struct vnode *vp, char *buff, int buflen, int *outlen)
2051 {
2052     char *end, *str;
2053     int   i, len, ret=0, counter=0;
2054
2055     end = &buff[buflen-1];
2056     *--end = '\0';
2057
2058     while(vp && VPARENT(vp) != vp) {
2059         // the maximum depth of a file system hierarchy is MAXPATHLEN/2
2060         // (with single-char names separated by slashes).  we panic if
2061         // we've ever looped more than that.
2062         if (counter++ > MAXPATHLEN/2) {
2063             panic("build_path: vnode parent chain is too long! vp 0x%x\n", vp);
2064         }
2065         str = VNAME(vp);
2066         if (VNAME(vp) == NULL) {
2067             if (VPARENT(vp) != NULL) {
2068                 ret = EINVAL;
2069             }
2070             break;
2071         }
2072
2073         // count how long the string is
2074         for(len=0; *str; str++, len++)
2075             /* nothing */;
2076
2077         // check that there's enough space
2078         if ((end - buff) < len) {
2079             ret = ENOSPC;
2080             break;
2081         }
2082
2083         // copy it backwards
2084         for(; len > 0; len--) {
2085             *--end = *--str;
2086         }
2087
2088         // put in the path separator
2089         *--end = '/';
2090
2091         // walk up the chain.
2092         vp = VPARENT(vp);
2093
2094         // check if we're crossing a mount point and
2095         // switch the vp if we are.
2096         if (vp && (vp->v_flag & VROOT)) {
2097             vp = vp->v_mount->mnt_vnodecovered;
2098         }
2099     }
2100
2101     // slide it down to the beginning of the buffer
2102     memmove(buff, end, &buff[buflen] - end);
2103
2104     *outlen = &buff[buflen] - end;
2105
2106     return ret;
2107 }
2108
2109 __private_extern__ int
2110 vn_getpath(struct vnode *vp, char *pathbuf, int *len)
2111 {
2112     return build_path(vp, pathbuf, *len, len);
2113 }
2114
2115
2116
2117 /*
2118  * Top level filesystem related information gathering.
2119  */
2120 int
2121 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
2122         int *name;
2123         u_int namelen;
2124         void *oldp;
2125         size_t *oldlenp;
2126         void *newp;
2127         size_t newlen;
2128         struct proc *p;
2129 {
2130         struct vfsconf *vfsp;
2131         int *username;
2132         u_int usernamelen;
2133         int error;
2134
2135         /*
2136          * The VFS_NUMMNTOPS shouldn't be at name[0] since
2137          * is a VFS generic variable. So now we must check
2138          * namelen so we don't end up covering any UFS
2139          * variables (sinc UFS vfc_typenum is 1).
2140          *
2141          * It should have been:
2142          *    name[0]:  VFS_GENERIC
2143          *    name[1]:  VFS_NUMMNTOPS
2144          */
2145         if (namelen == 1 && name[0] == VFS_NUMMNTOPS) {
2146                 extern unsigned int vfs_nummntops;
2147                 return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops));
2148         }
2149
2150         /* all sysctl names at this level are at least name and field */
2151         if (namelen < 2)
2152                 return (EISDIR);                /* overloaded */
2153         if (name[0] != VFS_GENERIC) {
2154                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2155                         if (vfsp->vfc_typenum == name[0])
2156                                 break;
2157                 if (vfsp == NULL)
2158                         return (EOPNOTSUPP);
2159                 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
2160                     oldp, oldlenp, newp, newlen, p));
2161         }
2162         switch (name[1]) {
2163         case VFS_MAXTYPENUM:
2164                 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
2165         case VFS_CONF:
2166                 if (namelen < 3)
2167                         return (ENOTDIR);       /* overloaded */
2168                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2169                         if (vfsp->vfc_typenum == name[2])
2170                                 break;
2171                 if (vfsp == NULL)
2172                         return (EOPNOTSUPP);
2173                 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
2174                     sizeof(struct vfsconf)));
2175         }
2176         /*
2177          * We need to get back into the general MIB, so we need to re-prepend
2178          * CTL_VFS to our name and try userland_sysctl().
2179          */
2180         usernamelen = namelen + 1;
2181         MALLOC(username, int *, usernamelen * sizeof(*username),
2182             M_TEMP, M_WAITOK);
2183         bcopy(name, username + 1, namelen * sizeof(*name));
2184         username[0] = CTL_VFS;
2185         error = userland_sysctl(p, username, usernamelen, oldp, oldlenp, 1,
2186             newp, newlen, oldlenp);
2187         FREE(username, M_TEMP);
2188         return (error);
2189 }
2190
2191 int kinfo_vdebug = 1;
2192 #define KINFO_VNODESLOP 10
2193 /*
2194  * Dump vnode list (via sysctl).
2195  * Copyout address of vnode followed by vnode.
2196  */
2197 /* ARGSUSED */
2198 int
2199 sysctl_vnode(where, sizep, p)
2200         char *where;
2201         size_t *sizep;
2202         struct proc *p;
2203 {
2204         struct mount *mp, *nmp;
2205         struct vnode *nvp, *vp;
2206         char *bp = where, *savebp;
2207         char *ewhere;
2208         int error;
2209
2210 #define VPTRSZ  sizeof (struct vnode *)
2211 #define VNODESZ sizeof (struct vnode)
2212         if (where == NULL) {
2213                 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2214                 return (0);
2215         }
2216         ewhere = where + *sizep;
2217
2218         simple_lock(&mountlist_slock);
2219         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2220                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
2221                         nmp = mp->mnt_list.cqe_next;
2222                         continue;
2223                 }
2224                 savebp = bp;
2225 again:
2226                 simple_lock(&mntvnode_slock);
2227                 for (vp = mp->mnt_vnodelist.lh_first;
2228                      vp != NULL;
2229                      vp = nvp) {
2230                         /*
2231                          * Check that the vp is still associated with
2232                          * this filesystem.  RACE: could have been
2233                          * recycled onto the same filesystem.
2234                          */
2235                         if (vp->v_mount != mp) {
2236                                 simple_unlock(&mntvnode_slock);
2237                                 if (kinfo_vdebug)
2238                                         printf("kinfo: vp changed\n");
2239                                 bp = savebp;
2240                                 goto again;
2241                         }
2242                         nvp = vp->v_mntvnodes.le_next;
2243                         if (bp + VPTRSZ + VNODESZ > ewhere) {
2244                                 simple_unlock(&mntvnode_slock);
2245                                 vfs_unbusy(mp, p);
2246                                 *sizep = bp - where;
2247                                 return (ENOMEM);
2248                         }
2249                         simple_unlock(&mntvnode_slock);
2250                         if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
2251                             (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) {
2252                                 vfs_unbusy(mp, p);
2253                                 return (error);
2254                         }
2255                         bp += VPTRSZ + VNODESZ;
2256                         simple_lock(&mntvnode_slock);
2257                 }
2258                 simple_unlock(&mntvnode_slock);
2259                 simple_lock(&mountlist_slock);
2260                 nmp = mp->mnt_list.cqe_next;
2261                 vfs_unbusy(mp, p);
2262         }
2263         simple_unlock(&mountlist_slock);
2264
2265         *sizep = bp - where;
2266         return (0);
2267 }
2268
2269 /*
2270  * Check to see if a filesystem is mounted on a block device.
2271  */
2272 int
2273 vfs_mountedon(vp)
2274         struct vnode *vp;
2275 {
2276         struct vnode *vq;
2277         int error = 0;
2278
2279         if (vp->v_specflags & SI_MOUNTEDON)
2280                 return (EBUSY);
2281         if (vp->v_flag & VALIASED) {
2282                 simple_lock(&spechash_slock);
2283                 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
2284                         if (vq->v_rdev != vp->v_rdev ||
2285                             vq->v_type != vp->v_type)
2286                                 continue;
2287                         if (vq->v_specflags & SI_MOUNTEDON) {
2288                                 error = EBUSY;
2289                                 break;
2290                         }
2291                 }
2292                 simple_unlock(&spechash_slock);
2293         }
2294         return (error);
2295 }
2296
2297 /*
2298  * Unmount all filesystems. The list is traversed in reverse order
2299  * of mounting to avoid dependencies.
2300  */
2301 __private_extern__ void
2302 vfs_unmountall()
2303 {
2304         struct mount *mp, *nmp;
2305         struct proc *p = current_proc();
2306
2307         /*
2308          * Since this only runs when rebooting, it is not interlocked.
2309          */
2310         for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2311                 nmp = mp->mnt_list.cqe_prev;
2312                 (void) dounmount(mp, MNT_FORCE, p);
2313         }
2314 }
2315
2316 /*
2317  * Build hash lists of net addresses and hang them off the mount point.
2318  * Called by vfs_export() to set up the lists of export addresses.
2319  */
2320 static int
2321 vfs_hang_addrlist(mp, nep, argp)
2322         struct mount *mp;
2323         struct netexport *nep;
2324         struct export_args *argp;
2325 {
2326         register struct netcred *np;
2327         register struct radix_node_head *rnh;
2328         register int i;
2329         struct radix_node *rn;
2330         struct sockaddr *saddr, *smask = 0;
2331         struct domain *dom;
2332         int error;
2333
2334         if (argp->ex_addrlen == 0) {
2335                 if (mp->mnt_flag & MNT_DEFEXPORTED)
2336                         return (EPERM);
2337                 np = &nep->ne_defexported;
2338                 np->netc_exflags = argp->ex_flags;
2339                 np->netc_anon = argp->ex_anon;
2340                 np->netc_anon.cr_ref = 1;
2341                 mp->mnt_flag |= MNT_DEFEXPORTED;
2342                 return (0);
2343         }
2344         i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2345         MALLOC(np, struct netcred *, i, M_NETADDR, M_WAITOK);
2346         bzero((caddr_t)np, i);
2347         saddr = (struct sockaddr *)(np + 1);
2348         if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
2349                 goto out;
2350         if (saddr->sa_len > argp->ex_addrlen)
2351                 saddr->sa_len = argp->ex_addrlen;
2352         if (argp->ex_masklen) {
2353                 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
2354                 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
2355                 if (error)
2356                         goto out;
2357                 if (smask->sa_len > argp->ex_masklen)
2358                         smask->sa_len = argp->ex_masklen;
2359         }
2360         i = saddr->sa_family;
2361         if ((rnh = nep->ne_rtable[i]) == 0) {
2362                 /*
2363                  * Seems silly to initialize every AF when most are not
2364                  * used, do so on demand here
2365                  */
2366                 for (dom = domains; dom; dom = dom->dom_next)
2367                         if (dom->dom_family == i && dom->dom_rtattach) {
2368                                 dom->dom_rtattach((void **)&nep->ne_rtable[i],
2369                                         dom->dom_rtoffset);
2370                                 break;
2371                         }
2372                 if ((rnh = nep->ne_rtable[i]) == 0) {
2373                         error = ENOBUFS;
2374                         goto out;
2375                 }
2376         }
2377         rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
2378                 np->netc_rnodes);
2379         if (rn == 0) {
2380                 /*
2381                  * One of the reasons that rnh_addaddr may fail is that
2382                  * the entry already exists. To check for this case, we
2383                  * look up the entry to see if it is there. If so, we
2384                  * do not need to make a new entry but do return success.
2385                  */
2386                 _FREE(np, M_NETADDR);
2387                 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
2388                 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
2389                     ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
2390                     !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
2391                             (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
2392                         return (0);
2393                 return (EPERM);
2394         }
2395         np->netc_exflags = argp->ex_flags;
2396         np->netc_anon = argp->ex_anon;
2397         np->netc_anon.cr_ref = 1;
2398         return (0);
2399 out:
2400         _FREE(np, M_NETADDR);
2401         return (error);
2402 }
2403
2404 /* ARGSUSED */
2405 static int
2406 vfs_free_netcred(rn, w)
2407         struct radix_node *rn;
2408         caddr_t w;
2409 {
2410         register struct radix_node_head *rnh = (struct radix_node_head *)w;
2411
2412         (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
2413         _FREE((caddr_t)rn, M_NETADDR);
2414         return (0);
2415 }
2416
2417 /*
2418  * Free the net address hash lists that are hanging off the mount points.
2419  */
2420 static void
2421 vfs_free_addrlist(nep)
2422         struct netexport *nep;
2423 {
2424         register int i;
2425         register struct radix_node_head *rnh;
2426
2427         for (i = 0; i <= AF_MAX; i++)
2428                 if (rnh = nep->ne_rtable[i]) {
2429                         (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
2430                             (caddr_t)rnh);
2431                         _FREE((caddr_t)rnh, M_RTABLE);
2432                         nep->ne_rtable[i] = 0;
2433                 }
2434 }
2435
2436 int
2437 vfs_export(mp, nep, argp)
2438         struct mount *mp;
2439         struct netexport *nep;
2440         struct export_args *argp;
2441 {
2442         int error;
2443
2444         if (argp->ex_flags & MNT_DELEXPORT) {
2445                 vfs_free_addrlist(nep);
2446                 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2447         }
2448         if (argp->ex_flags & MNT_EXPORTED) {
2449                 if (error = vfs_hang_addrlist(mp, nep, argp))
2450                         return (error);
2451                 mp->mnt_flag |= MNT_EXPORTED;
2452         }
2453         return (0);
2454 }
2455
2456 struct netcred *
2457 vfs_export_lookup(mp, nep, nam)
2458         register struct mount *mp;
2459         struct netexport *nep;
2460         struct mbuf *nam;
2461 {
2462         register struct netcred *np;
2463         register struct radix_node_head *rnh;
2464         struct sockaddr *saddr;
2465
2466         np = NULL;
2467         if (mp->mnt_flag & MNT_EXPORTED) {
2468                 /*
2469                  * Lookup in the export list first.
2470                  */
2471                 if (nam != NULL) {
2472                         saddr = mtod(nam, struct sockaddr *);
2473                         rnh = nep->ne_rtable[saddr->sa_family];
2474                         if (rnh != NULL) {
2475                                 np = (struct netcred *)
2476                                         (*rnh->rnh_matchaddr)((caddr_t)saddr,
2477                                                               rnh);
2478                                 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2479                                         np = NULL;
2480                         }
2481                 }
2482                 /*
2483                  * If no address match, use the default if it exists.
2484                  */
2485                 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2486                         np = &nep->ne_defexported;
2487         }
2488         return (np);
2489 }
2490
2491 /*
2492  * try to reclaim vnodes from the memory
2493  * object cache
2494  */
2495 static int
2496 vm_object_cache_reclaim(int count)
2497 {
2498         int cnt;
2499         void vnode_pager_release_from_cache(int *);
2500
2501         /* attempt to reclaim vnodes from VM object cache */
2502         cnt = count;
2503         vnode_pager_release_from_cache(&cnt);
2504         return(cnt);
2505 }
2506
2507 /*
2508  * Release memory object reference held by inactive vnodes
2509  * and then try to reclaim some vnodes from the memory
2510  * object cache
2511  */
2512 static int
2513 vnreclaim(int count)
2514 {
2515         int i, loopcnt;
2516         struct vnode *vp;
2517         int err;
2518         struct proc *p;
2519
2520         i = 0;
2521         loopcnt = 0;
2522
2523         /* Try to release "count" vnodes from the inactive list */
2524 restart:
2525         if (++loopcnt > inactivevnodes) {
2526                 /*
2527                  * I did my best trying to reclaim the vnodes.
2528                  * Do not try any more as that would only lead to
2529                  * long latencies. Also in the worst case
2530                  * this can get totally CPU bound.
2531                  * Just fall though and attempt a reclaim of VM
2532                  * object cache
2533                  */
2534                 goto out;
2535         }
2536
2537         simple_lock(&vnode_free_list_slock);
2538         for (vp = TAILQ_FIRST(&vnode_inactive_list);
2539                         (vp != NULLVP) && (i < count);
2540                         vp = TAILQ_NEXT(vp, v_freelist)) {
2541
2542                 if (!simple_lock_try(&vp->v_interlock))
2543                         continue;
2544
2545                 if (vp->v_usecount != 1)
2546                         panic("vnreclaim: v_usecount");
2547
2548                 if(!UBCINFOEXISTS(vp)) {
2549                         if (vp->v_type == VBAD) {
2550                                 VREMINACTIVE("vnreclaim", vp);
2551                                 simple_unlock(&vp->v_interlock);
2552                                 continue;
2553                         } else
2554                                 panic("non UBC vnode on inactive list");
2555                                 /* Should not reach here */
2556                 }
2557
2558                 /* If vnode is already being reclaimed, wait */
2559                 if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
2560                         vp->v_flag |= VXWANT;
2561                         simple_unlock(&vp->v_interlock);
2562                         simple_unlock(&vnode_free_list_slock);
2563                         (void)tsleep((caddr_t)vp, PINOD, "vocr", 0);
2564                         goto restart;
2565                 }
2566
2567                 /*
2568                  * if the vnode is being initialized,
2569                  * skip over it
2570                  */
2571                 if (ISSET(vp->v_flag,  VUINIT)) {
2572                         SET(vp->v_flag, VUWANT);
2573                         simple_unlock(&vp->v_interlock);
2574                         continue;
2575                 }
2576
2577                 VREMINACTIVE("vnreclaim", vp);
2578                 simple_unlock(&vnode_free_list_slock);
2579
2580                 if (ubc_issetflags(vp, UI_WASMAPPED)) {
2581                         /*
2582                          * We should not reclaim as it is likely
2583                          * to be in use. Let it die a natural death.
2584                          * Release the UBC reference if one exists
2585                          * and put it back at the tail.
2586                          */
2587                         simple_unlock(&vp->v_interlock);
2588                         if (ubc_release_named(vp)) {
2589                                 if (UBCINFOEXISTS(vp)) {
2590                                         simple_lock(&vp->v_interlock);
2591                                         if (vp->v_usecount == 1 && !VONLIST(vp))
2592                                                 vinactive(vp);
2593                                         simple_unlock(&vp->v_interlock);
2594                                 }
2595                         } else {
2596                             simple_lock(&vp->v_interlock);
2597                                 vinactive(vp);
2598                                 simple_unlock(&vp->v_interlock);
2599                         }
2600                 } else {
2601                         int didhold;
2602
2603                         VORECLAIM_ENABLE(vp);
2604
2605                         /*
2606                          * scrub the dirty pages and invalidate the buffers
2607                          */
2608                         p = current_proc();
2609                         err = vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p);
2610                         if (err) {
2611                                 /* cannot reclaim */
2612                                 simple_lock(&vp->v_interlock);
2613                                 vinactive(vp);
2614                                 VORECLAIM_DISABLE(vp);
2615                                 i++;
2616                                 simple_unlock(&vp->v_interlock);
2617                                 goto restart;
2618                         }
2619
2620                         /* keep the vnode alive so we can kill it */
2621                         simple_lock(&vp->v_interlock);
2622                         if(vp->v_usecount != 1)
2623                                 panic("VOCR: usecount race");
2624                         vp->v_usecount++;
2625                         simple_unlock(&vp->v_interlock);
2626
2627                         /* clean up the state in VM without invalidating */
2628                         didhold = ubc_hold(vp);
2629                         if (didhold)
2630                                 (void)ubc_clean(vp, 0);
2631
2632                         /* flush and invalidate buffers associated with the vnode */
2633                         if (vp->v_tag == VT_NFS)
2634                                 nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
2635                         else
2636                                 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
2637
2638                         /*
2639                          * Note: for the v_usecount == 2 case, VOP_INACTIVE
2640                          * has not yet been called.  Call it now while vp is
2641                          * still locked, it will also release the lock.
2642                          */
2643                         if (vp->v_usecount == 2)
2644                                 VOP_INACTIVE(vp, p);
2645                         else
2646                                 VOP_UNLOCK(vp, 0, p);
2647
2648                         if (didhold)
2649                                 ubc_rele(vp);
2650
2651                         /*
2652                          * destroy the ubc named reference.
2653                          * If we can't because it is held for I/Os
2654                          * in progress, just put it back on the inactive
2655                          * list and move on.  Otherwise, the paging reference
2656                          * is toast (and so is this vnode?).
2657                          */
2658                         if (ubc_destroy_named(vp)) {
2659                             i++;
2660                         }
2661                         simple_lock(&vp->v_interlock);
2662                         VORECLAIM_DISABLE(vp);
2663                         simple_unlock(&vp->v_interlock);
2664                         vrele(vp);  /* release extra use we added here */
2665                 }
2666                 /* inactive list lock was released, must restart */
2667                 goto restart;
2668         }
2669         simple_unlock(&vnode_free_list_slock);
2670
2671         vnode_reclaim_tried += i;
2672 out:
2673         i = vm_object_cache_reclaim(count);
2674         vnode_objects_reclaimed += i;
2675
2676         return(i);
2677 }
2678
2679 /*
2680  * This routine is called from vnode_pager_no_senders()
2681  * which in turn can be called with vnode locked by vnode_uncache()
2682  * But it could also get called as a result of vm_object_cache_trim().
2683  * In that case lock state is unknown.
2684  * AGE the vnode so that it gets recycled quickly.
2685  * Check lock status to decide whether to call vput() or vrele().
2686  */
2687 __private_extern__ void
2688 vnode_pager_vrele(struct vnode *vp)
2689 {
2690
2691         boolean_t       funnel_state;
2692         int isvnreclaim = 1;
2693
2694         funnel_state = thread_funnel_set(kernel_flock, TRUE);
2695
2696         /* Mark the vnode to be recycled */
2697         vagevp(vp);
2698
2699         simple_lock(&vp->v_interlock);
2700         /*
2701          * If a vgone (or vclean) is already in progress,
2702          * Do not bother with the ubc_info cleanup.
2703          * Let the vclean deal with it.
2704          */
2705         if (vp->v_flag & VXLOCK) {
2706                 CLR(vp->v_flag, VTERMINATE);
2707                 if (ISSET(vp->v_flag, VTERMWANT)) {
2708                         CLR(vp->v_flag, VTERMWANT);
2709                         wakeup((caddr_t)&vp->v_ubcinfo);
2710                 }
2711                 simple_unlock(&vp->v_interlock);
2712                 vrele(vp);
2713                 (void) thread_funnel_set(kernel_flock, funnel_state);
2714                 return;
2715         }
2716
2717         /* It's dead, Jim! */
2718         if (!ISSET(vp->v_flag, VORECLAIM)) {
2719                 /*
2720                  * called as a result of eviction of the memory
2721                  * object from the memory object cache
2722                  */
2723                 isvnreclaim = 0;
2724
2725                 /* So serialize vnode operations */
2726                 VORECLAIM_ENABLE(vp);
2727         }
2728         if (!ISSET(vp->v_flag, VTERMINATE))
2729                 SET(vp->v_flag, VTERMINATE);
2730
2731         cache_purge(vp);
2732
2733         if (UBCINFOEXISTS(vp)) {
2734                 struct ubc_info *uip = vp->v_ubcinfo;
2735
2736                 if (ubc_issetflags(vp, UI_WASMAPPED))
2737                         SET(vp->v_flag, VWASMAPPED);
2738
2739                 vp->v_ubcinfo = UBC_NOINFO;  /* catch bad accesses */
2740                 simple_unlock(&vp->v_interlock);
2741                 ubc_info_deallocate(uip);
2742         } else {
2743                 if ((vp->v_type == VBAD) && ((vp)->v_ubcinfo != UBC_INFO_NULL)
2744                         && ((vp)->v_ubcinfo != UBC_NOINFO)) {
2745                         struct ubc_info *uip = vp->v_ubcinfo;
2746
2747                         vp->v_ubcinfo = UBC_NOINFO;  /* catch bad accesses */
2748                         simple_unlock(&vp->v_interlock);
2749                         ubc_info_deallocate(uip);
2750                 } else {
2751                         simple_unlock(&vp->v_interlock);
2752                 }
2753         }
2754
2755         CLR(vp->v_flag, VTERMINATE);
2756
2757         if (vp->v_type != VBAD){
2758                 vgone(vp);      /* revoke the vnode */
2759                 vrele(vp);      /* and drop the reference */
2760         } else
2761                 vrele(vp);
2762
2763         if (ISSET(vp->v_flag, VTERMWANT)) {
2764                 CLR(vp->v_flag, VTERMWANT);
2765                 wakeup((caddr_t)&vp->v_ubcinfo);
2766         }
2767         if (!isvnreclaim)
2768                 VORECLAIM_DISABLE(vp);
2769         (void) thread_funnel_set(kernel_flock, funnel_state);
2770         return;
2771 }
2772
2773
2774 #if DIAGNOSTIC
2775 int walk_vnodes_debug=0;
2776
2777 void
2778 walk_allvnodes()
2779 {
2780         struct mount *mp, *nmp;
2781         struct vnode *vp;
2782         int cnt = 0;
2783
2784         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2785                 for (vp = mp->mnt_vnodelist.lh_first;
2786                      vp != NULL;
2787                      vp = vp->v_mntvnodes.le_next) {
2788                         if (vp->v_usecount < 0){
2789                                 if(walk_vnodes_debug) {
2790                                         printf("vp is %x\n",vp);
2791                                 }
2792                         }
2793                 }
2794                 nmp = mp->mnt_list.cqe_next;
2795         }
2796         for (cnt = 0, vp = vnode_free_list.tqh_first;
2797                 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2798                 if ((vp->v_usecount < 0) && walk_vnodes_debug) {
2799                         if(walk_vnodes_debug) {
2800                                 printf("vp is %x\n",vp);
2801                         }
2802                 }
2803         }
2804         printf("%d - free\n", cnt);
2805
2806         for (cnt = 0, vp = vnode_inactive_list.tqh_first;
2807                 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2808                 if ((vp->v_usecount < 0) && walk_vnodes_debug) {
2809                         if(walk_vnodes_debug) {
2810                                 printf("vp is %x\n",vp);
2811                         }
2812                 }
2813         }
2814         printf("%d - inactive\n", cnt);
2815 }
2816 #endif /* DIAGNOSTIC */
2817
2818
2819 struct x_constraints {
2820         u_int32_t x_maxreadcnt;
2821         u_int32_t x_maxsegreadsize;
2822         u_int32_t x_maxsegwritesize;
2823 };
2824
2825
2826 void
2827 vfs_io_attributes(vp, flags, iosize, vectors)
2828         struct vnode    *vp;
2829         int     flags;  /* B_READ or B_WRITE */
2830         int     *iosize;
2831         int     *vectors;
2832 {
2833         struct mount *mp;
2834
2835         /* start with "reasonable" defaults */
2836         *iosize = MAXPHYS;
2837         *vectors = 32;
2838
2839         mp = vp->v_mount;
2840         if (mp != NULL) {
2841                 switch (flags) {
2842                 case B_READ:
2843                         if (mp->mnt_kern_flag & MNTK_IO_XINFO)
2844                                 *iosize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxreadcnt;
2845                         else
2846                                 *iosize = mp->mnt_maxreadcnt;
2847                         *vectors = mp->mnt_segreadcnt;
2848                         break;
2849                 case B_WRITE:
2850                         *iosize = mp->mnt_maxwritecnt;
2851                         *vectors = mp->mnt_segwritecnt;
2852                         break;
2853                 default:
2854                         break;
2855                 }
2856                 if (*iosize == 0)
2857                         *iosize = MAXPHYS;
2858                 if (*vectors == 0)
2859                         *vectors = 32;
2860         }
2861         return;
2862 }
2863
2864 __private_extern__
2865 void
2866 vfs_io_maxsegsize(vp, flags, maxsegsize)
2867         struct vnode    *vp;
2868         int     flags;  /* B_READ or B_WRITE */
2869         int     *maxsegsize;
2870 {
2871         struct mount *mp;
2872
2873         /* start with "reasonable" default */
2874         *maxsegsize = MAXPHYS;
2875
2876         mp = vp->v_mount;
2877         if (mp != NULL) {
2878                 switch (flags) {
2879                 case B_READ:
2880                         if (mp->mnt_kern_flag & MNTK_IO_XINFO)
2881                                 *maxsegsize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegreadsize;
2882                         else
2883                                 /*
2884                                  * if the extended info doesn't exist
2885                                  * then use the maxread I/O size as the
2886                                  * max segment size... this is the previous behavior
2887                                  */
2888                                 *maxsegsize = mp->mnt_maxreadcnt;
2889                         break;
2890                 case B_WRITE:
2891                         if (mp->mnt_kern_flag & MNTK_IO_XINFO)
2892                                 *maxsegsize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegwritesize;
2893                         else
2894                                 /*
2895                                  * if the extended info doesn't exist
2896                                  * then use the maxwrite I/O size as the
2897                                  * max segment size... this is the previous behavior
2898                                  */
2899                                 *maxsegsize = mp->mnt_maxwritecnt;
2900                         break;
2901                 default:
2902                         break;
2903                 }
2904                 if (*maxsegsize == 0)
2905                         *maxsegsize = MAXPHYS;
2906         }
2907 }
2908
2909
2910 #include <sys/disk.h>
2911
2912
2913 int
2914 vfs_init_io_attributes(devvp, mp)
2915         struct vnode *devvp;
2916         struct mount *mp;
2917 {
2918         int error;
2919         off_t readblockcnt;
2920         off_t writeblockcnt;
2921         off_t readmaxcnt;
2922         off_t writemaxcnt;
2923         off_t readsegcnt;
2924         off_t writesegcnt;
2925         off_t readsegsize;
2926         off_t writesegsize;
2927         u_long blksize;
2928
2929         u_int64_t temp;
2930
2931         struct proc *p = current_proc();
2932         struct  ucred *cred = p->p_ucred;
2933
2934         int isvirtual = 0;
2935         /*
2936          * determine if this mount point exists on the same device as the root
2937          * partition... if so, then it comes under the hard throttle control
2938          */
2939         int        thisunit = -1;
2940         static int rootunit = -1;
2941         extern struct vnode *rootvp;
2942
2943         if (rootunit == -1) {
2944                 if (VOP_IOCTL(rootvp, DKIOCGETBSDUNIT, (caddr_t)&rootunit, 0, cred, p))
2945                         rootunit = -1;
2946                 else if (rootvp == devvp)
2947                         mp->mnt_kern_flag |= MNTK_ROOTDEV;
2948         }
2949         if (devvp != rootvp && rootunit != -1) {
2950                 if (VOP_IOCTL(devvp, DKIOCGETBSDUNIT, (caddr_t)&thisunit, 0, cred, p) == 0) {
2951                         if (thisunit == rootunit)
2952                                 mp->mnt_kern_flag |= MNTK_ROOTDEV;
2953                 }
2954         }
2955         if (VOP_IOCTL(devvp, DKIOCGETISVIRTUAL, (caddr_t)&isvirtual, 0, cred, p) == 0) {
2956                 if (isvirtual)
2957                         mp->mnt_kern_flag |= MNTK_VIRTUALDEV;
2958         }
2959
2960         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD,
2961                                 (caddr_t)&readblockcnt, 0, cred, p)))
2962                 return (error);
2963
2964         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE,
2965                                 (caddr_t)&writeblockcnt, 0, cred, p)))
2966                 return (error);
2967
2968         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTREAD,
2969                                 (caddr_t)&readmaxcnt, 0, cred, p)))
2970                 return (error);
2971
2972         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTWRITE,
2973                                 (caddr_t)&writemaxcnt, 0, cred, p)))
2974                 return (error);
2975
2976         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD,
2977                                 (caddr_t)&readsegcnt, 0, cred, p)))
2978                 return (error);
2979
2980         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE,
2981                                 (caddr_t)&writesegcnt, 0, cred, p)))
2982                 return (error);
2983
2984         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTREAD,
2985                                 (caddr_t)&readsegsize, 0, cred, p)))
2986                 return (error);
2987
2988         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTWRITE,
2989                                 (caddr_t)&writesegsize, 0, cred, p)))
2990                 return (error);
2991
2992         if ((error = VOP_IOCTL(devvp, DKIOCGETBLOCKSIZE,
2993                                 (caddr_t)&blksize, 0, cred, p)))
2994                 return (error);
2995
2996
2997         if ( !(mp->mnt_kern_flag & MNTK_IO_XINFO)) {
2998                 MALLOC(mp->mnt_xinfo_ptr, void *, sizeof(struct x_constraints), M_TEMP, M_WAITOK);
2999                 mp->mnt_kern_flag |= MNTK_IO_XINFO;
3000         }
3001
3002         if (readmaxcnt)
3003                 temp = (readmaxcnt > UINT32_MAX) ? UINT32_MAX : readmaxcnt;
3004         else {
3005                 if (readblockcnt) {
3006                         temp = readblockcnt * blksize;
3007                         temp = (temp > UINT32_MAX) ? UINT32_MAX : temp;
3008                 } else
3009                         temp = MAXPHYS;
3010         }
3011         ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxreadcnt = (u_int32_t)temp;
3012
3013         if (writemaxcnt)
3014                 temp = (writemaxcnt > UINT32_MAX) ? UINT32_MAX : writemaxcnt;
3015         else {
3016                 if (writeblockcnt) {
3017                         temp = writeblockcnt * blksize;
3018                         temp = (temp > UINT32_MAX) ? UINT32_MAX : temp;
3019                 } else
3020                         temp = MAXPHYS;
3021         }
3022         mp->mnt_maxwritecnt = (u_int32_t)temp;
3023
3024         if (readsegcnt) {
3025                 temp = (readsegcnt > UINT16_MAX) ? UINT16_MAX : readsegcnt;
3026                 mp->mnt_segreadcnt = (u_int16_t)temp;
3027         }
3028         if (writesegcnt) {
3029                 temp = (writesegcnt > UINT16_MAX) ? UINT16_MAX : writesegcnt;
3030                 mp->mnt_segwritecnt = (u_int16_t)temp;
3031         }
3032         if (readsegsize)
3033                 temp = (readsegsize > UINT32_MAX) ? UINT32_MAX : readsegsize;
3034         else
3035                 temp = mp->mnt_maxreadcnt;
3036         ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegreadsize = (u_int32_t)temp;
3037
3038         if (writesegsize)
3039                 temp = (writesegsize > UINT32_MAX) ? UINT32_MAX : writesegsize;
3040         else
3041                 temp = mp->mnt_maxwritecnt;
3042         ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegwritesize = (u_int32_t)temp;
3043
3044         return (error);
3045 }
3046
3047 static struct klist fs_klist;
3048
3049 void
3050 vfs_event_init(void)
3051 {
3052
3053         klist_init(&fs_klist);
3054 }
3055
3056 void
3057 vfs_event_signal(fsid_t *fsid, u_int32_t event, intptr_t data)
3058 {
3059
3060         KNOTE(&fs_klist, event);
3061 }
3062
3063 /*
3064  * return the number of mounted filesystems.
3065  */
3066 static int
3067 sysctl_vfs_getvfscnt(void)
3068 {
3069         struct mount *mp;
3070         int ret = 0;
3071
3072         simple_lock(&mountlist_slock);
3073         CIRCLEQ_FOREACH(mp, &mountlist, mnt_list)
3074             ret++;
3075         simple_unlock(&mountlist_slock);
3076         return (ret);
3077 }
3078
3079 /*
3080  * fill in the array of fsid_t's up to a max of 'count', the actual
3081  * number filled in will be set in '*actual'.  If there are more fsid_t's
3082  * than room in fsidlst then ENOMEM will be returned and '*actual' will
3083  * have the actual count.
3084  * having *actual filled out even in the error case is depended upon.
3085  */
3086 static int
3087 sysctl_vfs_getvfslist(fsid_t *fsidlst, int count, int *actual)
3088 {
3089         struct mount *mp;
3090
3091         *actual = 0;
3092         simple_lock(&mountlist_slock);
3093         CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
3094                 (*actual)++;
3095                 if (*actual <= count)
3096                         fsidlst[(*actual) - 1] = mp->mnt_stat.f_fsid;
3097         }
3098         simple_unlock(&mountlist_slock);
3099         return (*actual <= count ? 0 : ENOMEM);
3100 }
3101
3102 static int
3103 sysctl_vfs_vfslist SYSCTL_HANDLER_ARGS
3104 {
3105         int actual, error;
3106         size_t space;
3107         fsid_t *fsidlst;
3108
3109         /* This is a readonly node. */
3110         if (req->newptr != NULL)
3111                 return (EPERM);
3112
3113         /* they are querying us so just return the space required. */
3114         if (req->oldptr == NULL) {
3115                 req->oldidx = sysctl_vfs_getvfscnt() * sizeof(fsid_t);
3116                 return 0;
3117         }
3118 again:
3119         /*
3120          * Retrieve an accurate count of the amount of space required to copy
3121          * out all the fsids in the system.
3122          */
3123         space = req->oldlen;
3124         req->oldlen = sysctl_vfs_getvfscnt() * sizeof(fsid_t);
3125
3126         /* they didn't give us enough space. */
3127         if (space < req->oldlen)
3128                 return (ENOMEM);
3129
3130         MALLOC(fsidlst, fsid_t *, req->oldlen, M_TEMP, M_WAITOK);
3131         error = sysctl_vfs_getvfslist(fsidlst, req->oldlen / sizeof(fsid_t),
3132             &actual);
3133         /*
3134          * If we get back ENOMEM, then another mount has been added while we
3135          * slept in malloc above.  If this is the case then try again.
3136          */
3137         if (error == ENOMEM) {
3138                 FREE(fsidlst, M_TEMP);
3139                 req->oldlen = space;
3140                 goto again;
3141         }
3142         if (error == 0) {
3143                 error = SYSCTL_OUT(req, fsidlst, actual * sizeof(fsid_t));
3144         }
3145         FREE(fsidlst, M_TEMP);
3146         return (error);
3147 }
3148
3149 /*
3150  * Do a sysctl by fsid.
3151  */
3152 static int
3153 sysctl_vfs_ctlbyfsid SYSCTL_HANDLER_ARGS
3154 {
3155         struct vfsidctl vc;
3156         struct mount *mp;
3157         struct statfs *sp;
3158         struct proc *p;
3159         int *name;
3160         int error, flags, namelen;
3161
3162         name = arg1;
3163         namelen = arg2;
3164         p = req->p;
3165
3166         error = SYSCTL_IN(req, &vc, sizeof(vc));
3167         if (error)
3168                 return (error);
3169         if (vc.vc_vers != VFS_CTL_VERS1)
3170                 return (EINVAL);
3171         mp = vfs_getvfs(&vc.vc_fsid);
3172         if (mp == NULL)
3173                 return (ENOENT);
3174         /* reset so that the fs specific code can fetch it. */
3175         req->newidx = 0;
3176         /*
3177          * Note if this is a VFS_CTL then we pass the actual sysctl req
3178          * in for "oldp" so that the lower layer can DTRT and use the
3179          * SYSCTL_IN/OUT routines.
3180          */
3181         if (mp->mnt_op->vfs_sysctl != NULL) {
3182                 error = mp->mnt_op->vfs_sysctl(name, namelen,
3183                     req, NULL, NULL, 0, req->p);
3184                 if (error != EOPNOTSUPP)
3185                         return (error);
3186         }
3187         switch (name[0]) {
3188         case VFS_CTL_UMOUNT:
3189                 VCTLTOREQ(&vc, req);
3190                 error = SYSCTL_IN(req, &flags, sizeof(flags));
3191                 if (error)
3192                         break;
3193                 error = safedounmount(mp, flags, p);
3194                 break;
3195         case VFS_CTL_STATFS:
3196                 VCTLTOREQ(&vc, req);
3197                 error = SYSCTL_IN(req, &flags, sizeof(flags));
3198                 if (error)
3199                         break;
3200                 sp = &mp->mnt_stat;
3201                 if (((flags & MNT_NOWAIT) == 0 || (flags & MNT_WAIT)) &&
3202                     (error = VFS_STATFS(mp, sp, p)))
3203                         return (error);
3204                 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3205                 error = SYSCTL_OUT(req, sp, sizeof(*sp));
3206                 break;
3207         default:
3208                 return (EOPNOTSUPP);
3209         }
3210         return (error);
3211 }
3212
3213 static int      filt_fsattach(struct knote *kn);
3214 static void     filt_fsdetach(struct knote *kn);
3215 static int      filt_fsevent(struct knote *kn, long hint);
3216
3217 struct filterops fs_filtops =
3218         { 0, filt_fsattach, filt_fsdetach, filt_fsevent };
3219
3220 static int
3221 filt_fsattach(struct knote *kn)
3222 {
3223
3224         kn->kn_flags |= EV_CLEAR;
3225         KNOTE_ATTACH(&fs_klist, kn);
3226         return (0);
3227 }
3228
3229 static void
3230 filt_fsdetach(struct knote *kn)
3231 {
3232
3233         KNOTE_DETACH(&fs_klist, kn);
3234 }
3235
3236 static int
3237 filt_fsevent(struct knote *kn, long hint)
3238 {
3239
3240         kn->kn_fflags |= hint;
3241         return (kn->kn_fflags != 0);
3242 }
3243
3244 static int
3245 sysctl_vfs_noremotehang SYSCTL_HANDLER_ARGS
3246 {
3247         int out, error;
3248         pid_t pid;
3249         size_t space;
3250         struct proc *p;
3251
3252         /* We need a pid. */
3253         if (req->newptr == NULL)
3254                 return (EINVAL);
3255
3256         error = SYSCTL_IN(req, &pid, sizeof(pid));
3257         if (error)
3258                 return (error);
3259
3260         p = pfind(pid < 0 ? -pid : pid);
3261         if (p == NULL)
3262                 return (ESRCH);
3263
3264         /*
3265          * Fetching the value is ok, but we only fetch if the old
3266          * pointer is given.
3267          */
3268         if (req->oldptr != NULL) {
3269                 out = !((p->p_flag & P_NOREMOTEHANG) == 0);
3270                 error = SYSCTL_OUT(req, &out, sizeof(out));
3271                 return (error);
3272         }
3273
3274         /* cansignal offers us enough security. */
3275         if (p != req->p && suser(req->p->p_ucred, &req->p->p_acflag) != 0)
3276                 return (EPERM);
3277
3278         if (pid < 0)
3279                 p->p_flag &= ~P_NOREMOTEHANG;
3280         else
3281                 p->p_flag |= P_NOREMOTEHANG;
3282
3283         return (0);
3284 }
3285 /* the vfs.generic. branch. */
3286 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RW, 0, "vfs generic hinge");
3287 /* retreive a list of mounted filesystem fsid_t */
3288 SYSCTL_PROC(_vfs_generic, OID_AUTO, vfsidlist, CTLFLAG_RD,
3289     0, 0, sysctl_vfs_vfslist, "S,fsid", "List of mounted filesystem ids");
3290 /* perform operations on filesystem via fsid_t */
3291 SYSCTL_NODE(_vfs_generic, OID_AUTO, ctlbyfsid, CTLFLAG_RW,
3292     sysctl_vfs_ctlbyfsid, "ctlbyfsid");
3293 SYSCTL_PROC(_vfs_generic, OID_AUTO, noremotehang, CTLFLAG_RW,
3294     0, 0, sysctl_vfs_noremotehang, "I", "noremotehang");
3295