bsd/vfs/vfs_subr.c

   1 /*
   2  * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  23 /*
  24  * Copyright (c) 1989, 1993
  25  *      The Regents of the University of California.  All rights reserved.
  26  * (c) UNIX System Laboratories, Inc.
  27  * All or some portions of this file are derived from material licensed
  28  * to the University of California by American Telephone and Telegraph
  29  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  30  * the permission of UNIX System Laboratories, Inc.
  31  *
  32  * Redistribution and use in source and binary forms, with or without
  33  * modification, are permitted provided that the following conditions
  34  * are met:
  35  * 1. Redistributions of source code must retain the above copyright
  36  *    notice, this list of conditions and the following disclaimer.
  37  * 2. Redistributions in binary form must reproduce the above copyright
  38  *    notice, this list of conditions and the following disclaimer in the
  39  *    documentation and/or other materials provided with the distribution.
  40  * 3. All advertising materials mentioning features or use of this software
  41  *    must display the following acknowledgement:
  42  *      This product includes software developed by the University of
  43  *      California, Berkeley and its contributors.
  44  * 4. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  *      @(#)vfs_subr.c  8.31 (Berkeley) 5/26/95
  61  */
  62
  63 /*
  64  * External virtual filesystem routines
  65  */
  66
  67 #undef  DIAGNOSTIC
  68 #define DIAGNOSTIC 1
  69
  70 #include <sys/param.h>
  71 #include <sys/systm.h>
  72 #include <sys/proc.h>
  73 #include <sys/mount.h>
  74 #include <sys/time.h>
  75 #include <sys/vnode.h>
  76 #include <sys/stat.h>
  77 #include <sys/namei.h>
  78 #include <sys/ucred.h>
  79 #include <sys/buf.h>
  80 #include <sys/errno.h>
  81 #include <sys/malloc.h>
  82 #include <sys/domain.h>
  83 #include <sys/mbuf.h>
  84 #include <sys/syslog.h>
  85 #include <sys/ubc.h>
  86 #include <sys/vm.h>
  87 #include <sys/sysctl.h>
  88 #include <sys/filedesc.h>
  89 #include <sys/event.h>
  90
  91 #include <string.h>
  92 #include <machine/spl.h>
  93
  94
  95 #include <kern/assert.h>
  96
  97 #include <miscfs/specfs/specdev.h>
  98
  99 #include <mach/mach_types.h>
 100 #include <mach/memory_object_types.h>
 101
 102
 103 enum vtype iftovt_tab[16] = {
 104         VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 105         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
 106 };
 107 int     vttoif_tab[9] = {
 108         0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
 109         S_IFSOCK, S_IFIFO, S_IFMT,
 110 };
 111
 112 static void vfree(struct vnode *vp);
 113 static void vinactive(struct vnode *vp);
 114 static int vnreclaim(int count);
 115 extern kern_return_t
 116         adjust_vm_object_cache(vm_size_t oval, vm_size_t nval);
 117
 118 TAILQ_HEAD(freelst, vnode) vnode_free_list;     /* vnode free list */
 119 TAILQ_HEAD(inactivelst, vnode) vnode_inactive_list;     /* vnode inactive list */
 120 struct mntlist mountlist;                       /* mounted filesystem list */
 121
 122 #if DIAGNOSTIC
 123 #define VLISTCHECK(fun, vp, list)       \
 124         if ((vp)->v_freelist.tqe_prev == (struct vnode **)0xdeadb) \
 125                 panic("%s: %s vnode not on %slist", (fun), (list), (list));
 126
 127 #define VINACTIVECHECK(fun, vp, expected)       \
 128         do {    \
 129                 int __is_inactive = ISSET((vp)->v_flag, VUINACTIVE);    \
 130                 if (__is_inactive ^ expected)   \
 131                         panic("%s: %sinactive vnode, expected %s", (fun),       \
 132                                 __is_inactive? "" : "not ",     \
 133                                 expected? "inactive": "not inactive"); \
 134         } while(0)
 135 #else
 136 #define VLISTCHECK(fun, vp, list)
 137 #define VINACTIVECHECK(fun, vp, expected)
 138 #endif /* DIAGNOSTIC */
 139
 140 #define VLISTNONE(vp)   \
 141         do {    \
 142                 (vp)->v_freelist.tqe_next = (struct vnode *)0;  \
 143                 (vp)->v_freelist.tqe_prev = (struct vnode **)0xdeadb;   \
 144         } while(0)
 145
 146 #define VONLIST(vp)     \
 147         ((vp)->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
 148
 149 /* remove a vnode from free vnode list */
 150 #define VREMFREE(fun, vp)       \
 151         do {    \
 152                 VLISTCHECK((fun), (vp), "free");        \
 153                 TAILQ_REMOVE(&vnode_free_list, (vp), v_freelist);       \
 154                 VLISTNONE((vp));        \
 155                 freevnodes--;   \
 156         } while(0)
 157
 158 /* remove a vnode from inactive vnode list */
 159 #define VREMINACTIVE(fun, vp)   \
 160         do {    \
 161                 VLISTCHECK((fun), (vp), "inactive"); \
 162                 VINACTIVECHECK((fun), (vp), VUINACTIVE); \
 163                 TAILQ_REMOVE(&vnode_inactive_list, (vp), v_freelist); \
 164                 CLR((vp)->v_flag, VUINACTIVE); \
 165                 VLISTNONE((vp));        \
 166                 inactivevnodes--;       \
 167         } while(0)
 168
 169 #define VORECLAIM_ENABLE(vp)   \
 170         do {    \
 171                 if (ISSET((vp)->v_flag, VORECLAIM))     \
 172                         panic("vm_object_reclaim already");     \
 173                 SET((vp)->v_flag, VORECLAIM);   \
 174         } while(0)
 175
 176 #define VORECLAIM_DISABLE(vp)   \
 177         do {    \
 178                 CLR((vp)->v_flag, VORECLAIM);   \
 179                 if (ISSET((vp)->v_flag, VXWANT)) {      \
 180                         CLR((vp)->v_flag, VXWANT);      \
 181                         wakeup((caddr_t)(vp));  \
 182                 }       \
 183         } while(0)
 184
 185 /*
 186  * Have to declare first two locks as actual data even if !MACH_SLOCKS, since
 187  * a pointers to them get passed around.
 188  */
 189 simple_lock_data_t mountlist_slock;
 190 simple_lock_data_t mntvnode_slock;
 191 decl_simple_lock_data(,mntid_slock);
 192 decl_simple_lock_data(,vnode_free_list_slock);
 193 decl_simple_lock_data(,spechash_slock);
 194
 195 /*
 196  * vnodetarget is the amount of vnodes we expect to get back
 197  * from the the inactive vnode list and VM object cache.
 198  * As vnreclaim() is a mainly cpu bound operation for faster
 199  * processers this number could be higher.
 200  * Having this number too high introduces longer delays in
 201  * the execution of getnewvnode().
 202  */
 203 unsigned long vnodetarget;              /* target for vnreclaim() */
 204 #define VNODE_FREE_TARGET       20      /* Default value for vnodetarget */
 205
 206 /*
 207  * We need quite a few vnodes on the free list to sustain the
 208  * rapid stat() the compilation process does, and still benefit from the name
 209  * cache. Having too few vnodes on the free list causes serious disk
 210  * thrashing as we cycle through them.
 211  */
 212 #define VNODE_FREE_MIN          300     /* freelist should have at least these many */
 213
 214 /*
 215  * We need to get vnodes back from the VM object cache when a certain #
 216  * of vnodes are reused from the freelist. This is essential for the
 217  * caching to be effective in the namecache and the buffer cache [for the
 218  * metadata].
 219  */
 220 #define VNODE_TOOMANY_REUSED    (VNODE_FREE_MIN/4)
 221
 222 /*
 223  * If we have enough vnodes on the freelist we do not want to reclaim
 224  * the vnodes from the VM object cache.
 225  */
 226 #define VNODE_FREE_ENOUGH       (VNODE_FREE_MIN + (VNODE_FREE_MIN/2))
 227
 228 /*
 229  * Initialize the vnode management data structures.
 230  */
 231 __private_extern__ void
 232 vntblinit()
 233 {
 234         extern struct lock__bsd__       exchangelock;
 235
 236         simple_lock_init(&mountlist_slock);
 237         simple_lock_init(&mntvnode_slock);
 238         simple_lock_init(&mntid_slock);
 239         simple_lock_init(&spechash_slock);
 240         TAILQ_INIT(&vnode_free_list);
 241         simple_lock_init(&vnode_free_list_slock);
 242         TAILQ_INIT(&vnode_inactive_list);
 243         CIRCLEQ_INIT(&mountlist);
 244     lockinit(&exchangelock, PVFS, "exchange", 0, 0);
 245
 246         if (!vnodetarget)
 247                 vnodetarget = VNODE_FREE_TARGET;
 248
 249         /*
 250          * Scale the vm_object_cache to accomodate the vnodes
 251          * we want to cache
 252          */
 253         (void) adjust_vm_object_cache(0, desiredvnodes - VNODE_FREE_MIN);
 254 }
 255
 256 /* Reset the VM Object Cache with the values passed in */
 257 __private_extern__ kern_return_t
 258 reset_vmobjectcache(unsigned int val1, unsigned int val2)
 259 {
 260         vm_size_t oval = val1 - VNODE_FREE_MIN;
 261         vm_size_t nval;
 262
 263         if(val2 < VNODE_FREE_MIN)
 264                 nval = 0;
 265         else
 266                 nval = val2 - VNODE_FREE_MIN;
 267
 268         return(adjust_vm_object_cache(oval, nval));
 269 }
 270
 271 /*
 272  * Mark a mount point as busy. Used to synchronize access and to delay
 273  * unmounting. Interlock is not released on failure.
 274  */
 275 int
 276 vfs_busy(mp, flags, interlkp, p)
 277         struct mount *mp;
 278         int flags;
 279         struct slock *interlkp;
 280         struct proc *p;
 281 {
 282         int lkflags;
 283
 284         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 285                 if (flags & LK_NOWAIT)
 286                         return (ENOENT);
 287                 mp->mnt_kern_flag |= MNTK_MWAIT;
 288                 if (interlkp)
 289                         simple_unlock(interlkp);
 290                 /*
 291                  * Since all busy locks are shared except the exclusive
 292                  * lock granted when unmounting, the only place that a
 293                  * wakeup needs to be done is at the release of the
 294                  * exclusive lock at the end of dounmount.
 295                  */
 296                 sleep((caddr_t)mp, PVFS);
 297                 if (interlkp)
 298                         simple_lock(interlkp);
 299                 return (ENOENT);
 300         }
 301         lkflags = LK_SHARED;
 302         if (interlkp)
 303                 lkflags |= LK_INTERLOCK;
 304         if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
 305                 panic("vfs_busy: unexpected lock failure");
 306         return (0);
 307 }
 308
 309 /*
 310  * Free a busy filesystem.
 311  */
 312 void
 313 vfs_unbusy(mp, p)
 314         struct mount *mp;
 315         struct proc *p;
 316 {
 317
 318         lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
 319 }
 320
 321 /*
 322  * Lookup a filesystem type, and if found allocate and initialize
 323  * a mount structure for it.
 324  *
 325  * Devname is usually updated by mount(8) after booting.
 326  */
 327 int
 328 vfs_rootmountalloc(fstypename, devname, mpp)
 329         char *fstypename;
 330         char *devname;
 331         struct mount **mpp;
 332 {
 333         struct proc *p = current_proc();        /* XXX */
 334         struct vfsconf *vfsp;
 335         struct mount *mp;
 336
 337         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 338                 if (!strcmp(vfsp->vfc_name, fstypename))
 339                         break;
 340         if (vfsp == NULL)
 341                 return (ENODEV);
 342         mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 343         bzero((char *)mp, (u_long)sizeof(struct mount));
 344
 345     /* Initialize the default IO constraints */
 346     mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
 347     mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
 348
 349         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
 350         (void)vfs_busy(mp, LK_NOWAIT, 0, p);
 351         LIST_INIT(&mp->mnt_vnodelist);
 352         mp->mnt_vfc = vfsp;
 353         mp->mnt_op = vfsp->vfc_vfsops;
 354         mp->mnt_flag = MNT_RDONLY;
 355         mp->mnt_vnodecovered = NULLVP;
 356         vfsp->vfc_refcount++;
 357         mp->mnt_stat.f_type = vfsp->vfc_typenum;
 358         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 359         strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
 360         mp->mnt_stat.f_mntonname[0] = '/';
 361         (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
 362         *mpp = mp;
 363         return (0);
 364 }
 365
 366 /*
 367  * Find an appropriate filesystem to use for the root. If a filesystem
 368  * has not been preselected, walk through the list of known filesystems
 369  * trying those that have mountroot routines, and try them until one
 370  * works or we have tried them all.
 371  */
 372 int
 373 vfs_mountroot()
 374 {
 375         struct vfsconf *vfsp;
 376         extern int (*mountroot)(void);
 377         int error;
 378
 379         if (mountroot != NULL) {
 380                 error = (*mountroot)();
 381                 return (error);
 382         }
 383
 384         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 385                 if (vfsp->vfc_mountroot == NULL)
 386                         continue;
 387                 if ((error = (*vfsp->vfc_mountroot)()) == 0)
 388                         return (0);
 389                 if (error != EINVAL)
 390                         printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
 391         }
 392         return (ENODEV);
 393 }
 394
 395 /*
 396  * Lookup a mount point by filesystem identifier.
 397  */
 398 struct mount *
 399 vfs_getvfs(fsid)
 400         fsid_t *fsid;
 401 {
 402         register struct mount *mp;
 403
 404         simple_lock(&mountlist_slock);
 405         CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
 406                 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 407                     mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 408                         simple_unlock(&mountlist_slock);
 409                         return (mp);
 410                 }
 411         }
 412         simple_unlock(&mountlist_slock);
 413         return ((struct mount *)0);
 414 }
 415
 416 /*
 417  * Get a new unique fsid
 418  */
 419 void
 420 vfs_getnewfsid(mp)
 421         struct mount *mp;
 422 {
 423 static u_short xxxfs_mntid;
 424
 425         fsid_t tfsid;
 426         int mtype;
 427
 428         simple_lock(&mntid_slock);
 429         mtype = mp->mnt_vfc->vfc_typenum;
 430         mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
 431         mp->mnt_stat.f_fsid.val[1] = mtype;
 432         if (xxxfs_mntid == 0)
 433                 ++xxxfs_mntid;
 434         tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
 435         tfsid.val[1] = mtype;
 436         if (!CIRCLEQ_EMPTY(&mountlist)) {
 437                 while (vfs_getvfs(&tfsid)) {
 438                         tfsid.val[0]++;
 439                         xxxfs_mntid++;
 440                 }
 441         }
 442         mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
 443         simple_unlock(&mntid_slock);
 444 }
 445
 446 /*
 447  * Set vnode attributes to VNOVAL
 448  */
 449 void
 450 vattr_null(vap)
 451         register struct vattr *vap;
 452 {
 453
 454         vap->va_type = VNON;
 455         vap->va_size = vap->va_bytes = VNOVAL;
 456         vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
 457                 vap->va_fsid = vap->va_fileid =
 458                 vap->va_blocksize = vap->va_rdev =
 459                 vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
 460                 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
 461                 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
 462                 vap->va_flags = vap->va_gen = VNOVAL;
 463         vap->va_vaflags = 0;
 464 }
 465
 466 /*
 467  * Routines having to do with the management of the vnode table.
 468  */
 469 extern int (**dead_vnodeop_p)(void *);
 470 static void vclean __P((struct vnode *vp, int flag, struct proc *p));
 471 extern void vgonel __P((struct vnode *vp, struct proc *p));
 472 long numvnodes, freevnodes;
 473 long inactivevnodes;
 474 long vnode_reclaim_tried;
 475 long vnode_objects_reclaimed;
 476
 477
 478 extern struct vattr va_null;
 479
 480 /*
 481  * Return the next vnode from the free list.
 482  */
 483 int
 484 getnewvnode(tag, mp, vops, vpp)
 485         enum vtagtype tag;
 486         struct mount *mp;
 487         int (**vops)(void *);
 488         struct vnode **vpp;
 489 {
 490         struct proc *p = current_proc();        /* XXX */
 491         struct vnode *vp;
 492         int cnt, didretry = 0;
 493         static int reused = 0;                          /* track the reuse rate */
 494         int reclaimhits = 0;
 495
 496 retry:
 497         simple_lock(&vnode_free_list_slock);
 498         /*
 499          * MALLOC a vnode if the number of vnodes has not reached the desired
 500          * value and the number on the free list is still reasonable...
 501          * reuse from the freelist even though we may evict a name cache entry
 502          * to reduce the number of vnodes that accumulate.... vnodes tie up
 503          * wired memory and are never garbage collected
 504          */
 505         if (numvnodes < desiredvnodes && (freevnodes < (2 * VNODE_FREE_MIN))) {
 506                 numvnodes++;
 507                 simple_unlock(&vnode_free_list_slock);
 508                 MALLOC_ZONE(vp, struct vnode *, sizeof *vp, M_VNODE, M_WAITOK);
 509                 bzero((char *)vp, sizeof *vp);
 510                 VLISTNONE(vp);          /* avoid double queue removal */
 511                 simple_lock_init(&vp->v_interlock);
 512                 goto done;
 513         }
 514
 515         /*
 516          * Once the desired number of vnodes are allocated,
 517          * we start reusing the vnodes.
 518          */
 519         if (freevnodes < VNODE_FREE_MIN) {
 520                 /*
 521                  * if we are low on vnodes on the freelist attempt to get
 522                  * some back from the inactive list and VM object cache
 523                  */
 524                 simple_unlock(&vnode_free_list_slock);
 525                 (void)vnreclaim(vnodetarget);
 526                 simple_lock(&vnode_free_list_slock);
 527         }
 528         if (numvnodes >= desiredvnodes && reused > VNODE_TOOMANY_REUSED) {
 529                 reused = 0;
 530                 if (freevnodes < VNODE_FREE_ENOUGH) {
 531                         simple_unlock(&vnode_free_list_slock);
 532                         (void)vnreclaim(vnodetarget);
 533                         simple_lock(&vnode_free_list_slock);
 534                 }
 535         }
 536
 537         for (cnt = 0, vp = vnode_free_list.tqh_first;
 538                         vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
 539                 if (simple_lock_try(&vp->v_interlock)) {
 540                         /* got the interlock */
 541                         if (ISSET(vp->v_flag, VORECLAIM)) {
 542                                 /* skip over the vnodes that are being reclaimed */
 543                                 simple_unlock(&vp->v_interlock);
 544                                 reclaimhits++;
 545                         } else
 546                                 break;
 547                 }
 548         }
 549
 550         /*
 551          * Unless this is a bad time of the month, at most
 552          * the first NCPUS items on the free list are
 553          * locked, so this is close enough to being empty.
 554          */
 555         if (vp == NULLVP) {
 556                 simple_unlock(&vnode_free_list_slock);
 557                 if (!(didretry++) && (vnreclaim(vnodetarget) > 0))
 558                         goto retry;
 559                 tablefull("vnode");
 560                 log(LOG_EMERG, "%d vnodes locked, %d desired, %d numvnodes, "
 561                         "%d free, %d inactive, %d being reclaimed\n",
 562                         cnt, desiredvnodes, numvnodes, freevnodes, inactivevnodes,
 563                         reclaimhits);
 564                 *vpp = 0;
 565                 return (ENFILE);
 566         }
 567
 568         if (vp->v_usecount)
 569                 panic("free vnode isn't: v_type = %d, v_usecount = %d?",
 570                                 vp->v_type, vp->v_usecount);
 571
 572         VREMFREE("getnewvnode", vp);
 573         reused++;
 574         simple_unlock(&vnode_free_list_slock);
 575         vp->v_lease = NULL;
 576         cache_purge(vp);
 577         if (vp->v_type != VBAD)
 578                 vgonel(vp, p);  /* clean and reclaim the vnode */
 579         else
 580                 simple_unlock(&vp->v_interlock);
 581 #if DIAGNOSTIC
 582         if (vp->v_data)
 583                 panic("cleaned vnode isn't");
 584         {
 585         int s = splbio();
 586         if (vp->v_numoutput)
 587                 panic("Clean vnode has pending I/O's");
 588         splx(s);
 589         }
 590 #endif
 591         if (UBCINFOEXISTS(vp))
 592                 panic("getnewvnode: ubcinfo not cleaned");
 593         else
 594                 vp->v_ubcinfo = 0;
 595
 596         if (vp->v_flag & VHASDIRTY)
 597                 cluster_release(vp);
 598
 599         // make sure all these fields are cleared out as the
 600         // name/parent stuff uses them and assumes they're
 601         // cleared to null/0.
 602         if (vp->v_scmap != NULL) {
 603             panic("getnewvnode: vp @ 0x%x has non-null scmap.\n", vp);
 604         }
 605         vp->v_un.vu_name = NULL;
 606         vp->v_scdirty = 0;
 607         vp->v_un1.v_cl.v_pad = 0;
 608
 609
 610         vp->v_lastr = -1;
 611         vp->v_ralen = 0;
 612         vp->v_maxra = 0;
 613         vp->v_ciosiz = 0;
 614         vp->v_clen = 0;
 615         vp->v_socket = 0;
 616
 617         /* we may have blocked, re-evaluate state */
 618         simple_lock(&vnode_free_list_slock);
 619         if (VONLIST(vp)) {
 620                 if (vp->v_usecount == 0)
 621                         VREMFREE("getnewvnode", vp);
 622                  else if (ISSET((vp)->v_flag, VUINACTIVE))
 623                         VREMINACTIVE("getnewvnode", vp);
 624         }
 625         simple_unlock(&vnode_free_list_slock);
 626
 627 done:
 628         vp->v_flag = VSTANDARD;
 629         vp->v_type = VNON;
 630         vp->v_tag = tag;
 631         vp->v_op = vops;
 632         insmntque(vp, mp);
 633         *vpp = vp;
 634         vp->v_usecount = 1;
 635         vp->v_data = 0;
 636         return (0);
 637 }
 638
 639 /*
 640  * Move a vnode from one mount queue to another.
 641  */
 642 void
 643 insmntque(vp, mp)
 644         struct vnode *vp;
 645         struct mount *mp;
 646 {
 647
 648         simple_lock(&mntvnode_slock);
 649         /*
 650          * Delete from old mount point vnode list, if on one.
 651          */
 652         if (vp->v_mount != NULL)
 653                 LIST_REMOVE(vp, v_mntvnodes);
 654         /*
 655          * Insert into list of vnodes for the new mount point, if available.
 656          */
 657         if ((vp->v_mount = mp) != NULL)
 658                 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
 659         simple_unlock(&mntvnode_slock);
 660 }
 661
 662 __inline void
 663 vpwakeup(struct vnode *vp)
 664 {
 665         if (vp) {
 666                 if (--vp->v_numoutput < 0)
 667                         panic("vpwakeup: neg numoutput");
 668                 if ((vp->v_flag & VBWAIT || vp->v_flag & VTHROTTLED)
 669                     && vp->v_numoutput <= 0) {
 670                         vp->v_flag &= ~(VBWAIT|VTHROTTLED);
 671                         wakeup((caddr_t)&vp->v_numoutput);
 672                 }
 673         }
 674 }
 675
 676 /*
 677  * Update outstanding I/O count and do wakeup if requested.
 678  */
 679 void
 680 vwakeup(bp)
 681         register struct buf *bp;
 682 {
 683         CLR(bp->b_flags, B_WRITEINPROG);
 684         vpwakeup(bp->b_vp);
 685 }
 686
 687 /*
 688  * Flush out and invalidate all buffers associated with a vnode.
 689  * Called with the underlying object locked.
 690  */
 691 int
 692 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 693         register struct vnode *vp;
 694         int flags;
 695         struct ucred *cred;
 696         struct proc *p;
 697         int slpflag, slptimeo;
 698 {
 699         register struct buf *bp;
 700         struct buf *nbp, *blist;
 701         int s, error = 0;
 702
 703         if (flags & V_SAVE) {
 704                 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) {
 705                         return (error);
 706                 }
 707                 if (vp->v_dirtyblkhd.lh_first)
 708                         panic("vinvalbuf: dirty bufs (vp 0x%x, bp 0x%x)", vp, vp->v_dirtyblkhd.lh_first);
 709         }
 710
 711         for (;;) {
 712                 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
 713                         while (blist && blist->b_lblkno < 0)
 714                                 blist = blist->b_vnbufs.le_next;
 715                 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
 716                     (flags & V_SAVEMETA))
 717                         while (blist && blist->b_lblkno < 0)
 718                                 blist = blist->b_vnbufs.le_next;
 719                 if (!blist)
 720                         break;
 721
 722                 for (bp = blist; bp; bp = nbp) {
 723                         nbp = bp->b_vnbufs.le_next;
 724                         if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
 725                                 continue;
 726                         s = splbio();
 727                         if (ISSET(bp->b_flags, B_BUSY)) {
 728                                 SET(bp->b_flags, B_WANTED);
 729                                 error = tsleep((caddr_t)bp,
 730                                         slpflag | (PRIBIO + 1), "vinvalbuf",
 731                                         slptimeo);
 732                                 splx(s);
 733                                 if (error) {
 734                                         return (error);
 735                                 }
 736                                 break;
 737                         }
 738                         bremfree(bp);
 739                         SET(bp->b_flags, B_BUSY);
 740                         splx(s);
 741                         /*
 742                          * XXX Since there are no node locks for NFS, I believe
 743                          * there is a slight chance that a delayed write will
 744                          * occur while sleeping just above, so check for it.
 745                          */
 746                         if (ISSET(bp->b_flags, B_DELWRI) && (flags & V_SAVE)) {
 747                                 (void) VOP_BWRITE(bp);
 748                                 break;
 749                         }
 750
 751                         if (bp->b_flags & B_LOCKED) {
 752                                 panic("vinvalbuf: bp @ 0x%x is locked!", bp);
 753                                 break;
 754                         } else {
 755                                 SET(bp->b_flags, B_INVAL);
 756                         }
 757                         brelse(bp);
 758                 }
 759         }
 760         if (!(flags & V_SAVEMETA) &&
 761             (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
 762                 panic("vinvalbuf: flush failed");
 763         return (0);
 764 }
 765
 766 /*
 767  * Create a vnode for a block device.
 768  * Used for root filesystem, argdev, and swap areas.
 769  * Also used for memory file system special devices.
 770  */
 771 int
 772 bdevvp(dev, vpp)
 773         dev_t dev;
 774         struct vnode **vpp;
 775 {
 776         register struct vnode *vp;
 777         struct vnode *nvp;
 778         int error;
 779
 780         if (dev == NODEV) {
 781                 *vpp = NULLVP;
 782                 return (ENODEV);
 783         }
 784         error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
 785         if (error) {
 786                 *vpp = NULLVP;
 787                 return (error);
 788         }
 789         vp = nvp;
 790         vp->v_type = VBLK;
 791         if (nvp = checkalias(vp, dev, (struct mount *)0)) {
 792                 vput(vp);
 793                 vp = nvp;
 794         }
 795         *vpp = vp;
 796         return (0);
 797 }
 798
 799 /*
 800  * Check to see if the new vnode represents a special device
 801  * for which we already have a vnode (either because of
 802  * bdevvp() or because of a different vnode representing
 803  * the same block device). If such an alias exists, deallocate
 804  * the existing contents and return the aliased vnode. The
 805  * caller is responsible for filling it with its new contents.
 806  */
 807 struct vnode *
 808 checkalias(nvp, nvp_rdev, mp)
 809         register struct vnode *nvp;
 810         dev_t nvp_rdev;
 811         struct mount *mp;
 812 {
 813         struct proc *p = current_proc();        /* XXX */
 814         struct vnode *vp;
 815         struct vnode **vpp;
 816         struct specinfo *specinfop;
 817
 818         if (nvp->v_type != VBLK && nvp->v_type != VCHR)
 819                 return (NULLVP);
 820
 821         MALLOC_ZONE(specinfop, struct specinfo *, sizeof(struct specinfo),
 822                         M_SPECINFO, M_WAITOK);
 823         vpp = &speclisth[SPECHASH(nvp_rdev)];
 824 loop:
 825         simple_lock(&spechash_slock);
 826         for (vp = *vpp; vp; vp = vp->v_specnext) {
 827                 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
 828                         continue;
 829                 /*
 830                  * Alias, but not in use, so flush it out.
 831                  */
 832                 simple_lock(&vp->v_interlock);
 833                 if (vp->v_usecount == 0) {
 834                         simple_unlock(&spechash_slock);
 835                         vgonel(vp, p);
 836                         goto loop;
 837                 }
 838                 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
 839                         simple_unlock(&spechash_slock);
 840                         goto loop;
 841                 }
 842                 break;
 843         }
 844         if (vp == NULL || vp->v_tag != VT_NON) {
 845                 nvp->v_specinfo = specinfop;
 846                 specinfop = 0;  /* buffer used */
 847                 bzero(nvp->v_specinfo, sizeof(struct specinfo));
 848                 nvp->v_rdev = nvp_rdev;
 849                 nvp->v_hashchain = vpp;
 850                 nvp->v_specnext = *vpp;
 851                 nvp->v_specflags = 0;
 852                 simple_unlock(&spechash_slock);
 853                 *vpp = nvp;
 854                 if (vp != NULLVP) {
 855                         nvp->v_flag |= VALIASED;
 856                         vp->v_flag |= VALIASED;
 857                         vput(vp);
 858                 }
 859                 /* Since buffer is used just return */
 860                 return (NULLVP);
 861         }
 862         simple_unlock(&spechash_slock);
 863         VOP_UNLOCK(vp, 0, p);
 864         simple_lock(&vp->v_interlock);
 865         vclean(vp, 0, p);
 866         vp->v_op = nvp->v_op;
 867         vp->v_tag = nvp->v_tag;
 868         nvp->v_type = VNON;
 869         insmntque(vp, mp);
 870         if (specinfop)
 871                 FREE_ZONE((void *)specinfop, sizeof(struct specinfo), M_SPECINFO);
 872         return (vp);
 873 }
 874
 875 /*
 876  * Get a reference on a particular vnode and lock it if requested.
 877  * If the vnode was on the inactive list, remove it from the list.
 878  * If the vnode was on the free list, remove it from the list and
 879  * move it to inactive list as needed.
 880  * The vnode lock bit is set if the vnode is being eliminated in
 881  * vgone. The process is awakened when the transition is completed,
 882  * and an error returned to indicate that the vnode is no longer
 883  * usable (possibly having been changed to a new file system type).
 884  */
 885 int
 886 vget(vp, flags, p)
 887         struct vnode *vp;
 888         int flags;
 889         struct proc *p;
 890 {
 891         int error = 0;
 892         u_long vpid;
 893
 894         vpid = vp->v_id;    // save off the original v_id
 895
 896 retry:
 897
 898         /*
 899          * If the vnode is in the process of being cleaned out for
 900          * another use, we wait for the cleaning to finish and then
 901          * return failure. Cleaning is determined by checking that
 902          * the VXLOCK flag is set.
 903          */
 904         if ((flags & LK_INTERLOCK) == 0)
 905                 simple_lock(&vp->v_interlock);
 906         if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
 907                 vp->v_flag |= VXWANT;
 908                 simple_unlock(&vp->v_interlock);
 909                 (void)tsleep((caddr_t)vp, PINOD, "vget", 0);
 910                 return (ENOENT);
 911         }
 912
 913         /*
 914          * vnode is being terminated.
 915          * wait for vnode_pager_no_senders() to clear VTERMINATE
 916          */
 917         if (ISSET(vp->v_flag, VTERMINATE)) {
 918                 SET(vp->v_flag, VTERMWANT);
 919                 simple_unlock(&vp->v_interlock);
 920                 (void)tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vget1", 0);
 921                 return (ENOENT);
 922         }
 923
 924         /*
 925          * if the vnode is being initialized,
 926          * wait for it to finish initialization
 927          */
 928         if (ISSET(vp->v_flag,  VUINIT)) {
 929                 SET(vp->v_flag, VUWANT);
 930                 simple_unlock(&vp->v_interlock);
 931                 (void) tsleep((caddr_t)vp, PINOD, "vget2", 0);
 932                 goto retry;
 933         }
 934
 935         simple_lock(&vnode_free_list_slock);
 936         if (VONLIST(vp)) {
 937                 if (vp->v_usecount == 0)
 938                         VREMFREE("vget", vp);
 939                  else if (ISSET((vp)->v_flag, VUINACTIVE))
 940                         VREMINACTIVE("vget", vp);
 941         }
 942         simple_unlock(&vnode_free_list_slock);
 943
 944         if (++vp->v_usecount <= 0)
 945                 panic("vget: v_usecount");
 946
 947         /*
 948          * Recover named reference as needed
 949          */
 950         if (UBCISVALID(vp) && !ubc_issetflags(vp, UI_HASOBJREF)) {
 951                 simple_unlock(&vp->v_interlock);
 952                 if (ubc_getobject(vp, UBC_HOLDOBJECT) == MEMORY_OBJECT_CONTROL_NULL) {
 953                         error = ENOENT;
 954                         goto errout;
 955                 }
 956                 simple_lock(&vp->v_interlock);
 957         }
 958
 959         if (flags & LK_TYPE_MASK) {
 960                 if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
 961                         goto errout;
 962                 if (vpid != vp->v_id) {    // make sure it's still the same vnode
 963                     vput(vp);
 964                     return ENOENT;
 965                 }
 966                 return (0);
 967         }
 968
 969         if ((flags & LK_INTERLOCK) == 0)
 970                 simple_unlock(&vp->v_interlock);
 971
 972         if (vpid != vp->v_id) {            // make sure it's still the same vnode
 973             vrele(vp);
 974             return ENOENT;
 975         }
 976
 977         return (0);
 978
 979 errout:
 980         simple_lock(&vp->v_interlock);
 981
 982         /*
 983          * we may have blocked. Re-evaluate the state
 984          */
 985         simple_lock(&vnode_free_list_slock);
 986         if (VONLIST(vp)) {
 987                 if (vp->v_usecount == 0)
 988                         VREMFREE("vget", vp);
 989                  else if (ISSET((vp)->v_flag, VUINACTIVE))
 990                         VREMINACTIVE("vget", vp);
 991         }
 992         simple_unlock(&vnode_free_list_slock);
 993
 994         /*
 995          * If the vnode was not active in the first place
 996          * must not call vrele() as VOP_INACTIVE() is not
 997          * required.
 998          * So inlined part of vrele() here.
 999          */
1000         if (--vp->v_usecount == 1) {
1001                 if (UBCINFOEXISTS(vp)) {
1002                         vinactive(vp);
1003                         simple_unlock(&vp->v_interlock);
1004                         return (error);
1005                 }
1006         }
1007         if (vp->v_usecount > 0) {
1008                 simple_unlock(&vp->v_interlock);
1009                 return (error);
1010         }
1011         if (vp->v_usecount < 0)
1012                 panic("vget: negative usecount (%d)", vp->v_usecount);
1013         vfree(vp);
1014         simple_unlock(&vp->v_interlock);
1015         return (error);
1016 }
1017
1018 /*
1019  * Get a pager reference on the particular vnode.
1020  *
1021  * This is called from ubc_info_init() and it is asumed that
1022  * the vnode is not on the free list.
1023  * It is also assumed that the vnode is neither being recycled
1024  * by vgonel nor being terminated by vnode_pager_vrele().
1025  *
1026  * The vnode interlock is NOT held by the caller.
1027  */
1028 __private_extern__ int
1029 vnode_pager_vget(vp)
1030         struct vnode *vp;
1031 {
1032         simple_lock(&vp->v_interlock);
1033
1034         UBCINFOCHECK("vnode_pager_vget", vp);
1035
1036         if (ISSET(vp->v_flag, (VXLOCK|VORECLAIM|VTERMINATE)))
1037                 panic("%s: dying vnode", "vnode_pager_vget");
1038
1039         simple_lock(&vnode_free_list_slock);
1040         /* The vnode should not be on free list */
1041         if (VONLIST(vp)) {
1042                 if (vp->v_usecount == 0)
1043                         panic("%s: still on list", "vnode_pager_vget");
1044                 else if (ISSET((vp)->v_flag, VUINACTIVE))
1045                         VREMINACTIVE("vnode_pager_vget", vp);
1046         }
1047
1048         /* The vnode should not be on the inactive list here */
1049         simple_unlock(&vnode_free_list_slock);
1050
1051         /* After all those checks, now do the real work :-) */
1052         if (++vp->v_usecount <= 0)
1053                 panic("vnode_pager_vget: v_usecount");
1054         simple_unlock(&vp->v_interlock);
1055
1056         return (0);
1057 }
1058
1059 /*
1060  * Stubs to use when there is no locking to be done on the underlying object.
1061  * A minimal shared lock is necessary to ensure that the underlying object
1062  * is not revoked while an operation is in progress. So, an active shared
1063  * count is maintained in an auxillary vnode lock structure.
1064  */
1065 int
1066 vop_nolock(ap)
1067         struct vop_lock_args /* {
1068                 struct vnode *a_vp;
1069                 int a_flags;
1070                 struct proc *a_p;
1071         } */ *ap;
1072 {
1073 #ifdef notyet
1074         /*
1075          * This code cannot be used until all the non-locking filesystems
1076          * (notably NFS) are converted to properly lock and release nodes.
1077          * Also, certain vnode operations change the locking state within
1078          * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
1079          * and symlink). Ideally these operations should not change the
1080          * lock state, but should be changed to let the caller of the
1081          * function unlock them. Otherwise all intermediate vnode layers
1082          * (such as union, umapfs, etc) must catch these functions to do
1083          * the necessary locking at their layer. Note that the inactive
1084          * and lookup operations also change their lock state, but this
1085          * cannot be avoided, so these two operations will always need
1086          * to be handled in intermediate layers.
1087          */
1088         struct vnode *vp = ap->a_vp;
1089         int vnflags, flags = ap->a_flags;
1090
1091         if (vp->v_vnlock == NULL) {
1092                 if ((flags & LK_TYPE_MASK) == LK_DRAIN)
1093                         return (0);
1094                 MALLOC(vp->v_vnlock, struct lock__bsd__ *,
1095                                 sizeof(struct lock__bsd__), M_TEMP, M_WAITOK);
1096                 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1097         }
1098         switch (flags & LK_TYPE_MASK) {
1099         case LK_DRAIN:
1100                 vnflags = LK_DRAIN;
1101                 break;
1102         case LK_EXCLUSIVE:
1103         case LK_SHARED:
1104                 vnflags = LK_SHARED;
1105                 break;
1106         case LK_UPGRADE:
1107         case LK_EXCLUPGRADE:
1108         case LK_DOWNGRADE:
1109                 return (0);
1110         case LK_RELEASE:
1111         default:
1112                 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
1113         }
1114         if (flags & LK_INTERLOCK)
1115                 vnflags |= LK_INTERLOCK;
1116         return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
1117 #else /* for now */
1118         /*
1119          * Since we are not using the lock manager, we must clear
1120          * the interlock here.
1121          */
1122         if (ap->a_flags & LK_INTERLOCK)
1123                 simple_unlock(&ap->a_vp->v_interlock);
1124         return (0);
1125 #endif
1126 }
1127
1128 /*
1129  * Decrement the active use count.
1130  */
1131 int
1132 vop_nounlock(ap)
1133         struct vop_unlock_args /* {
1134                 struct vnode *a_vp;
1135                 int a_flags;
1136                 struct proc *a_p;
1137         } */ *ap;
1138 {
1139         struct vnode *vp = ap->a_vp;
1140
1141         if (vp->v_vnlock == NULL)
1142                 return (0);
1143         return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p));
1144 }
1145
1146 /*
1147  * Return whether or not the node is in use.
1148  */
1149 int
1150 vop_noislocked(ap)
1151         struct vop_islocked_args /* {
1152                 struct vnode *a_vp;
1153         } */ *ap;
1154 {
1155         struct vnode *vp = ap->a_vp;
1156
1157         if (vp->v_vnlock == NULL)
1158                 return (0);
1159         return (lockstatus(vp->v_vnlock));
1160 }
1161
1162 /*
1163  * Vnode reference.
1164  */
1165 void
1166 vref(vp)
1167         struct vnode *vp;
1168 {
1169
1170         simple_lock(&vp->v_interlock);
1171         if (vp->v_usecount <= 0)
1172                 panic("vref used where vget required");
1173
1174         /* If on the inactive list, remove it from there */
1175         simple_lock(&vnode_free_list_slock);
1176         if (ISSET((vp)->v_flag, VUINACTIVE))
1177                 VREMINACTIVE("vref", vp);
1178         simple_unlock(&vnode_free_list_slock);
1179
1180         if (++vp->v_usecount <= 0)
1181                 panic("vref v_usecount");
1182         simple_unlock(&vp->v_interlock);
1183 }
1184
1185 static void
1186 clean_up_name_parent_ptrs(struct vnode *vp)
1187 {
1188     if (VNAME(vp) || VPARENT(vp)) {
1189         char *tmp1;
1190         struct vnode *tmp2;
1191
1192         // do it this way so we don't block before clearing
1193         // these fields.
1194         tmp1 = VNAME(vp);
1195         tmp2 = VPARENT(vp);
1196         VNAME(vp) = NULL;
1197         VPARENT(vp) = NULL;
1198
1199         if (tmp1) {
1200             remove_name(tmp1);
1201         }
1202
1203         if (tmp2) {
1204             vrele(tmp2);
1205         }
1206     }
1207 }
1208
1209
1210 /*
1211  * put the vnode on appropriate free list.
1212  * called with v_interlock held.
1213  */
1214 static void
1215 vfree(vp)
1216         struct vnode *vp;
1217 {
1218         funnel_t *curflock;
1219         extern int disable_funnel;
1220
1221         if ((curflock = thread_funnel_get()) != kernel_flock &&
1222             !(disable_funnel && curflock != THR_FUNNEL_NULL))
1223                 panic("Entering vfree() without kernel funnel");
1224
1225         /*
1226          * if the vnode is not obtained by calling getnewvnode() we
1227          * are not responsible for the cleanup. Just return.
1228          */
1229         if (!(vp->v_flag & VSTANDARD)) {
1230                 return;
1231         }
1232
1233         if (vp->v_usecount != 0)
1234                 panic("vfree: v_usecount");
1235
1236         /* insert at tail of LRU list or at head if VAGE is set */
1237         simple_lock(&vnode_free_list_slock);
1238
1239         // make sure the name & parent pointers get cleared out
1240 //      clean_up_name_parent_ptrs(vp);
1241
1242         if (VONLIST(vp))
1243                  panic("%s: vnode still on list", "vfree");
1244
1245         if (vp->v_flag & VAGE) {
1246                 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1247                 vp->v_flag &= ~VAGE;
1248         } else
1249                 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1250         freevnodes++;
1251         simple_unlock(&vnode_free_list_slock);
1252         return;
1253 }
1254
1255 /*
1256  * put the vnode on the inactive list.
1257  * called with v_interlock held
1258  */
1259 static void
1260 vinactive(vp)
1261         struct vnode *vp;
1262 {
1263         funnel_t *curflock;
1264         extern int disable_funnel;
1265
1266         if ((curflock = thread_funnel_get()) != kernel_flock &&
1267             !(disable_funnel && curflock != THR_FUNNEL_NULL))
1268                 panic("Entering vinactive() without kernel funnel");
1269
1270         if (!UBCINFOEXISTS(vp))
1271                 panic("vinactive: not a UBC vnode");
1272
1273         if (vp->v_usecount != 1)
1274                 panic("vinactive: v_usecount");
1275
1276         simple_lock(&vnode_free_list_slock);
1277
1278         if (VONLIST(vp))
1279                  panic("%s: vnode still on list", "vinactive");
1280         VINACTIVECHECK("vinactive", vp, 0);
1281
1282         TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_freelist);
1283         SET(vp->v_flag, VUINACTIVE);
1284         CLR(vp->v_flag, (VNOCACHE_DATA | VRAOFF));
1285
1286         inactivevnodes++;
1287         simple_unlock(&vnode_free_list_slock);
1288         return;
1289 }
1290
1291
1292 /*
1293  * vput(), just unlock and vrele()
1294  */
1295 void
1296 vput(vp)
1297         struct vnode *vp;
1298 {
1299         struct proc *p = current_proc();        /* XXX */
1300
1301         simple_lock(&vp->v_interlock);
1302         if (--vp->v_usecount == 1) {
1303                 if (UBCINFOEXISTS(vp)) {
1304                         vinactive(vp);
1305                         simple_unlock(&vp->v_interlock);
1306                         VOP_UNLOCK(vp, 0, p);
1307                         return;
1308                 }
1309         }
1310         if (vp->v_usecount > 0) {
1311                 simple_unlock(&vp->v_interlock);
1312                 VOP_UNLOCK(vp, 0, p);
1313                 return;
1314         }
1315 #if DIAGNOSTIC
1316         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1317                 vprint("vput: bad ref count", vp);
1318                 panic("vput: v_usecount = %d, v_writecount = %d",
1319                         vp->v_usecount, vp->v_writecount);
1320         }
1321 #endif
1322         simple_lock(&vnode_free_list_slock);
1323         if (ISSET((vp)->v_flag, VUINACTIVE))
1324                 VREMINACTIVE("vref", vp);
1325         simple_unlock(&vnode_free_list_slock);
1326
1327         simple_unlock(&vp->v_interlock);
1328         VOP_INACTIVE(vp, p);
1329         /*
1330          * The interlock is not held and
1331          * VOP_INCATIVE releases the vnode lock.
1332          * We could block and the vnode might get reactivated
1333          * Can not just call vfree without checking the state
1334          */
1335         simple_lock(&vp->v_interlock);
1336         if (!VONLIST(vp)) {
1337                 if (vp->v_usecount == 0)
1338                         vfree(vp);
1339                 else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp))
1340                         vinactive(vp);
1341         }
1342         simple_unlock(&vp->v_interlock);
1343 }
1344
1345 /*
1346  * Vnode release.
1347  * If count drops to zero, call inactive routine and return to freelist.
1348  */
1349 void
1350 vrele(vp)
1351         struct vnode *vp;
1352 {
1353         struct proc *p = current_proc();        /* XXX */
1354         funnel_t *curflock;
1355         extern int disable_funnel;
1356
1357         if ((curflock = thread_funnel_get()) != kernel_flock &&
1358             !(disable_funnel && curflock != THR_FUNNEL_NULL))
1359                 panic("Entering vrele() without kernel funnel");
1360
1361         simple_lock(&vp->v_interlock);
1362         if (--vp->v_usecount == 1) {
1363                 if (UBCINFOEXISTS(vp)) {
1364                         if ((vp->v_flag & VXLOCK) == 0)
1365                                 vinactive(vp);
1366                         simple_unlock(&vp->v_interlock);
1367                         return;
1368                 }
1369         }
1370         if (vp->v_usecount > 0) {
1371                 simple_unlock(&vp->v_interlock);
1372                 return;
1373         }
1374 #if DIAGNOSTIC
1375         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1376                 vprint("vrele: bad ref count", vp);
1377                 panic("vrele: ref cnt");
1378         }
1379 #endif
1380
1381         if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
1382                 /* vnode is being cleaned, just return */
1383                 vfree(vp);
1384                 simple_unlock(&vp->v_interlock);
1385                 return;
1386         }
1387
1388         if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1389                 VOP_INACTIVE(vp, p);
1390                 /*
1391                  * vn_lock releases the interlock and
1392                  * VOP_INCATIVE releases the vnode lock.
1393                  * We could block and the vnode might get reactivated
1394                  * Can not just call vfree without checking the state
1395                  */
1396                 simple_lock(&vp->v_interlock);
1397                 if (!VONLIST(vp)) {
1398                         if (vp->v_usecount == 0)
1399                                 vfree(vp);
1400                         else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp))
1401                                 vinactive(vp);
1402                 }
1403                 simple_unlock(&vp->v_interlock);
1404         }
1405 #if 0
1406         else {
1407                 vfree(vp);
1408                 simple_unlock(&vp->v_interlock);
1409                 kprintf("vrele: vn_lock() failed for vp = 0x%08x\n", vp);
1410         }
1411 #endif
1412 }
1413
1414 void
1415 vagevp(vp)
1416         struct vnode *vp;
1417 {
1418         simple_lock(&vp->v_interlock);
1419         vp->v_flag |= VAGE;
1420         simple_unlock(&vp->v_interlock);
1421         return;
1422 }
1423
1424 /*
1425  * Page or buffer structure gets a reference.
1426  */
1427 void
1428 vhold(vp)
1429         register struct vnode *vp;
1430 {
1431
1432         simple_lock(&vp->v_interlock);
1433         vp->v_holdcnt++;
1434         simple_unlock(&vp->v_interlock);
1435 }
1436
1437 /*
1438  * Page or buffer structure frees a reference.
1439  */
1440 void
1441 holdrele(vp)
1442         register struct vnode *vp;
1443 {
1444
1445         simple_lock(&vp->v_interlock);
1446         if (vp->v_holdcnt <= 0)
1447                 panic("holdrele: holdcnt");
1448         vp->v_holdcnt--;
1449         simple_unlock(&vp->v_interlock);
1450 }
1451
1452 /*
1453  * Remove any vnodes in the vnode table belonging to mount point mp.
1454  *
1455  * If MNT_NOFORCE is specified, there should not be any active ones,
1456  * return error if any are found (nb: this is a user error, not a
1457  * system error). If MNT_FORCE is specified, detach any active vnodes
1458  * that are found.
1459  */
1460 #if DIAGNOSTIC
1461 int busyprt = 0;        /* print out busy vnodes */
1462 #if 0
1463 struct ctldebug debug1 = { "busyprt", &busyprt };
1464 #endif /* 0 */
1465 #endif
1466
1467 int
1468 vflush(mp, skipvp, flags)
1469         struct mount *mp;
1470         struct vnode *skipvp;
1471         int flags;
1472 {
1473         struct proc *p = current_proc();
1474         struct vnode *vp, *nvp;
1475         int busy = 0;
1476
1477         simple_lock(&mntvnode_slock);
1478 loop:
1479         for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1480                 if (vp->v_mount != mp)
1481                         goto loop;
1482                 nvp = vp->v_mntvnodes.le_next;
1483                 /*
1484                  * Skip over a selected vnode.
1485                  */
1486                 if (vp == skipvp)
1487                         continue;
1488
1489                 simple_lock(&vp->v_interlock);
1490                 /*
1491                  * Skip over a vnodes marked VSYSTEM or VNOFLUSH.
1492                  */
1493                 if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || (vp->v_flag & VNOFLUSH))) {
1494                         simple_unlock(&vp->v_interlock);
1495                         continue;
1496                 }
1497                 /*
1498                  * Skip over a vnodes marked VSWAP.
1499                  */
1500                 if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) {
1501                         simple_unlock(&vp->v_interlock);
1502                         continue;
1503                 }
1504                 /*
1505                  * If WRITECLOSE is set, only flush out regular file
1506                  * vnodes open for writing.
1507                  */
1508                 if ((flags & WRITECLOSE) &&
1509                     (vp->v_writecount == 0 || vp->v_type != VREG)) {
1510                         simple_unlock(&vp->v_interlock);
1511                         continue;
1512                 }
1513                 /*
1514                  * With v_usecount == 0, all we need to do is clear
1515                  * out the vnode data structures and we are done.
1516                  */
1517                 if (vp->v_usecount == 0) {
1518                         simple_unlock(&mntvnode_slock);
1519                         vgonel(vp, p);
1520                         simple_lock(&mntvnode_slock);
1521                         continue;
1522                 }
1523                 /*
1524                  * If FORCECLOSE is set, forcibly close the vnode.
1525                  * For block or character devices, revert to an
1526                  * anonymous device. For all other files, just kill them.
1527                  */
1528                 if (flags & FORCECLOSE) {
1529                         simple_unlock(&mntvnode_slock);
1530                         if (vp->v_type != VBLK && vp->v_type != VCHR) {
1531                                 vgonel(vp, p);
1532                         } else {
1533                                 vclean(vp, 0, p);
1534                                 vp->v_op = spec_vnodeop_p;
1535                                 insmntque(vp, (struct mount *)0);
1536                         }
1537                         simple_lock(&mntvnode_slock);
1538                         continue;
1539                 }
1540 #if DIAGNOSTIC
1541                 if (busyprt)
1542                         vprint("vflush: busy vnode", vp);
1543 #endif
1544                 simple_unlock(&vp->v_interlock);
1545                 busy++;
1546         }
1547         simple_unlock(&mntvnode_slock);
1548         if (busy && ((flags & FORCECLOSE)==0))
1549                 return (EBUSY);
1550         return (0);
1551 }
1552
1553 /*
1554  * Disassociate the underlying file system from a vnode.
1555  * The vnode interlock is held on entry.
1556  */
1557 static void
1558 vclean(vp, flags, p)
1559         struct vnode *vp;
1560         int flags;
1561         struct proc *p;
1562 {
1563         int active;
1564         int didhold;
1565
1566         /*
1567          * if the vnode is not obtained by calling getnewvnode() we
1568          * are not responsible for the cleanup. Just return.
1569          */
1570         if (!(vp->v_flag & VSTANDARD)) {
1571                 simple_unlock(&vp->v_interlock);
1572                 return;
1573         }
1574
1575         /*
1576          * Check to see if the vnode is in use.
1577          * If so we have to reference it before we clean it out
1578          * so that its count cannot fall to zero and generate a
1579          * race against ourselves to recycle it.
1580          */
1581         if (active = vp->v_usecount) {
1582                 /*
1583                  * active vnode can not be on the free list.
1584                  * we are about to take an extra reference on this vnode
1585                  * do the queue management as needed
1586                  * Not doing so can cause "still on list" or
1587                  * "vnreclaim: v_usecount" panic if VOP_LOCK() blocks.
1588                  */
1589                 simple_lock(&vnode_free_list_slock);
1590                 if (ISSET((vp)->v_flag, VUINACTIVE))
1591                         VREMINACTIVE("vclean", vp);
1592                 simple_unlock(&vnode_free_list_slock);
1593
1594                 if (++vp->v_usecount <= 0)
1595                         panic("vclean: v_usecount");
1596         }
1597
1598         /*
1599          * Prevent the vnode from being recycled or
1600          * brought into use while we clean it out.
1601          */
1602         if (vp->v_flag & VXLOCK)
1603                 panic("vclean: deadlock");
1604         vp->v_flag |= VXLOCK;
1605
1606         /*
1607          * Even if the count is zero, the VOP_INACTIVE routine may still
1608          * have the object locked while it cleans it out. The VOP_LOCK
1609          * ensures that the VOP_INACTIVE routine is done with its work.
1610          * For active vnodes, it ensures that no other activity can
1611          * occur while the underlying object is being cleaned out.
1612          */
1613         VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1614
1615         /*
1616          * While blocked in VOP_LOCK() someone could have dropped
1617          * reference[s] and we could land on the inactive list.
1618          * if this vnode is on the inactive list
1619          * take it off the list.
1620          */
1621         simple_lock(&vnode_free_list_slock);
1622         if (ISSET((vp)->v_flag, VUINACTIVE))
1623                 VREMINACTIVE("vclean", vp);
1624         simple_unlock(&vnode_free_list_slock);
1625
1626         /* Clean the pages in VM. */
1627         if (active && (flags & DOCLOSE))
1628                 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1629
1630         /* Clean the pages in VM. */
1631         didhold = ubc_hold(vp);
1632         if ((active) && (didhold))
1633                 (void)ubc_clean(vp, 0); /* do not invalidate */
1634
1635         /*
1636          * Clean out any buffers associated with the vnode.
1637          */
1638         if (flags & DOCLOSE) {
1639                 if (vp->v_tag == VT_NFS)
1640                         nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
1641                 else
1642                         vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1643         }
1644
1645         if (active)
1646                 VOP_INACTIVE(vp, p);
1647         else
1648                 VOP_UNLOCK(vp, 0, p);
1649
1650         /* Destroy ubc named reference */
1651         if (didhold) {
1652                 ubc_rele(vp);
1653                 ubc_destroy_named(vp);
1654         }
1655         /*
1656          * Make sure vp isn't on the inactive list.
1657          */
1658         simple_lock(&vnode_free_list_slock);
1659         if (ISSET((vp)->v_flag, VUINACTIVE)) {
1660                 VREMINACTIVE("vclean", vp);
1661         }
1662         simple_unlock(&vnode_free_list_slock);
1663
1664         /*
1665          * Reclaim the vnode.
1666          */
1667         if (VOP_RECLAIM(vp, p))
1668                 panic("vclean: cannot reclaim");
1669
1670         // make sure the name & parent ptrs get cleaned out!
1671         clean_up_name_parent_ptrs(vp);
1672
1673         cache_purge(vp);
1674         if (vp->v_vnlock) {
1675                 struct lock__bsd__ *tmp = vp->v_vnlock;
1676                 if ((tmp->lk_flags & LK_DRAINED) == 0)
1677                         vprint("vclean: lock not drained", vp);
1678                 vp->v_vnlock = NULL;
1679                 FREE(tmp, M_TEMP);
1680         }
1681
1682         /* It's dead, Jim! */
1683         vp->v_op = dead_vnodeop_p;
1684         vp->v_tag = VT_NON;
1685
1686         insmntque(vp, (struct mount *)0);
1687
1688         /*
1689          * Done with purge, notify sleepers of the grim news.
1690          */
1691         vp->v_flag &= ~VXLOCK;
1692         if (vp->v_flag & VXWANT) {
1693                 vp->v_flag &= ~VXWANT;
1694                 wakeup((caddr_t)vp);
1695         }
1696
1697         if (active)
1698                 vrele(vp);
1699 }
1700
1701 /*
1702  * Eliminate all activity associated with  the requested vnode
1703  * and with all vnodes aliased to the requested vnode.
1704  */
1705 int
1706 vop_revoke(ap)
1707         struct vop_revoke_args /* {
1708                 struct vnode *a_vp;
1709                 int a_flags;
1710         } */ *ap;
1711 {
1712         struct vnode *vp, *vq;
1713         struct proc *p = current_proc();
1714
1715 #if DIAGNOSTIC
1716         if ((ap->a_flags & REVOKEALL) == 0)
1717                 panic("vop_revoke");
1718 #endif
1719
1720         vp = ap->a_vp;
1721         simple_lock(&vp->v_interlock);
1722
1723         if (vp->v_flag & VALIASED) {
1724                 /*
1725                  * If a vgone (or vclean) is already in progress,
1726                  * wait until it is done and return.
1727                  */
1728                 if (vp->v_flag & VXLOCK) {
1729                         while (vp->v_flag & VXLOCK) {
1730                                 vp->v_flag |= VXWANT;
1731                                 simple_unlock(&vp->v_interlock);
1732                                 (void)tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1733                         }
1734                         return (0);
1735                 }
1736                 /*
1737                  * Ensure that vp will not be vgone'd while we
1738                  * are eliminating its aliases.
1739                  */
1740                 vp->v_flag |= VXLOCK;
1741                 simple_unlock(&vp->v_interlock);
1742                 while (vp->v_flag & VALIASED) {
1743                         simple_lock(&spechash_slock);
1744                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1745                                 if (vq->v_rdev != vp->v_rdev ||
1746                                     vq->v_type != vp->v_type || vp == vq)
1747                                         continue;
1748                                 simple_unlock(&spechash_slock);
1749                                 vgone(vq);
1750                                 break;
1751                         }
1752                         if (vq == NULLVP)
1753                                 simple_unlock(&spechash_slock);
1754                 }
1755                 /*
1756                  * Remove the lock so that vgone below will
1757                  * really eliminate the vnode after which time
1758                  * vgone will awaken any sleepers.
1759                  */
1760                 simple_lock(&vp->v_interlock);
1761                 vp->v_flag &= ~VXLOCK;
1762         }
1763         vgonel(vp, p);
1764         return (0);
1765 }
1766
1767 /*
1768  * Recycle an unused vnode to the front of the free list.
1769  * Release the passed interlock if the vnode will be recycled.
1770  */
1771 int
1772 vrecycle(vp, inter_lkp, p)
1773         struct vnode *vp;
1774         struct slock *inter_lkp;
1775         struct proc *p;
1776 {
1777
1778         simple_lock(&vp->v_interlock);
1779         if (vp->v_usecount == 0) {
1780                 if (inter_lkp)
1781                         simple_unlock(inter_lkp);
1782                 vgonel(vp, p);
1783                 return (1);
1784         }
1785         simple_unlock(&vp->v_interlock);
1786         return (0);
1787 }
1788
1789 /*
1790  * Eliminate all activity associated with a vnode
1791  * in preparation for reuse.
1792  */
1793 void
1794 vgone(vp)
1795         struct vnode *vp;
1796 {
1797         struct proc *p = current_proc();
1798
1799         simple_lock(&vp->v_interlock);
1800         vgonel(vp, p);
1801 }
1802
1803 /*
1804  * vgone, with the vp interlock held.
1805  */
1806 void
1807 vgonel(vp, p)
1808         struct vnode *vp;
1809         struct proc *p;
1810 {
1811         struct vnode *vq;
1812         struct vnode *vx;
1813
1814         /*
1815          * if the vnode is not obtained by calling getnewvnode() we
1816          * are not responsible for the cleanup. Just return.
1817          */
1818         if (!(vp->v_flag & VSTANDARD)) {
1819                 simple_unlock(&vp->v_interlock);
1820                 return;
1821         }
1822
1823         /*
1824          * If a vgone (or vclean) is already in progress,
1825          * wait until it is done and return.
1826          */
1827         if (vp->v_flag & VXLOCK) {
1828                 while (vp->v_flag & VXLOCK) {
1829                         vp->v_flag |= VXWANT;
1830                         simple_unlock(&vp->v_interlock);
1831                         (void)tsleep((caddr_t)vp, PINOD, "vgone", 0);
1832                 }
1833                 return;
1834         }
1835         /*
1836          * Clean out the filesystem specific data.
1837          */
1838         vclean(vp, DOCLOSE, p);
1839         /*
1840          * Delete from old mount point vnode list, if on one.
1841          */
1842         if (vp->v_mount != NULL)
1843                 insmntque(vp, (struct mount *)0);
1844         /*
1845          * If special device, remove it from special device alias list
1846          * if it is on one.
1847          */
1848         if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1849                 simple_lock(&spechash_slock);
1850                 if (*vp->v_hashchain == vp) {
1851                         *vp->v_hashchain = vp->v_specnext;
1852                 } else {
1853                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1854                                 if (vq->v_specnext != vp)
1855                                         continue;
1856                                 vq->v_specnext = vp->v_specnext;
1857                                 break;
1858                         }
1859                         if (vq == NULL)
1860                                 panic("missing bdev");
1861                 }
1862                 if (vp->v_flag & VALIASED) {
1863                         vx = NULL;
1864                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1865                                 if (vq->v_rdev != vp->v_rdev ||
1866                                     vq->v_type != vp->v_type)
1867                                         continue;
1868                                 if (vx)
1869                                         break;
1870                                 vx = vq;
1871                         }
1872                         if (vx == NULL)
1873                                 panic("missing alias");
1874                         if (vq == NULL)
1875                                 vx->v_flag &= ~VALIASED;
1876                         vp->v_flag &= ~VALIASED;
1877                 }
1878                 simple_unlock(&spechash_slock);
1879                 {
1880                 struct specinfo *tmp = vp->v_specinfo;
1881                 vp->v_specinfo = NULL;
1882                 FREE_ZONE((void *)tmp, sizeof(struct specinfo), M_SPECINFO);
1883                 }
1884         }
1885         /*
1886          * If it is on the freelist and not already at the head,
1887          * move it to the head of the list. The test of the back
1888          * pointer and the reference count of zero is because
1889          * it will be removed from the free list by getnewvnode,
1890          * but will not have its reference count incremented until
1891          * after calling vgone. If the reference count were
1892          * incremented first, vgone would (incorrectly) try to
1893          * close the previous instance of the underlying object.
1894          * So, the back pointer is explicitly set to `0xdeadb' in
1895          * getnewvnode after removing it from the freelist to ensure
1896          * that we do not try to move it here.
1897          */
1898         if (vp->v_usecount == 0 && (vp->v_flag & VUINACTIVE) == 0) {
1899                 simple_lock(&vnode_free_list_slock);
1900                 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1901                     vnode_free_list.tqh_first != vp) {
1902                         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1903                         TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1904                 }
1905                 simple_unlock(&vnode_free_list_slock);
1906         }
1907         vp->v_type = VBAD;
1908 }
1909
1910 /*
1911  * Lookup a vnode by device number.
1912  */
1913 int
1914 vfinddev(dev, type, vpp)
1915         dev_t dev;
1916         enum vtype type;
1917         struct vnode **vpp;
1918 {
1919         struct vnode *vp;
1920         int rc = 0;
1921
1922         simple_lock(&spechash_slock);
1923         for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1924                 if (dev != vp->v_rdev || type != vp->v_type)
1925                         continue;
1926                 *vpp = vp;
1927                 rc = 1;
1928                 break;
1929         }
1930         simple_unlock(&spechash_slock);
1931         return (rc);
1932 }
1933
1934 /*
1935  * Calculate the total number of references to a special device.
1936  */
1937 int
1938 vcount(vp)
1939         struct vnode *vp;
1940 {
1941         struct vnode *vq, *vnext;
1942         int count;
1943
1944 loop:
1945         if ((vp->v_flag & VALIASED) == 0)
1946                 return (vp->v_usecount);
1947         simple_lock(&spechash_slock);
1948         for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1949                 vnext = vq->v_specnext;
1950                 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1951                         continue;
1952                 /*
1953                  * Alias, but not in use, so flush it out.
1954                  */
1955                 if (vq->v_usecount == 0 && vq != vp) {
1956                         simple_unlock(&spechash_slock);
1957                         vgone(vq);
1958                         goto loop;
1959                 }
1960                 count += vq->v_usecount;
1961         }
1962         simple_unlock(&spechash_slock);
1963         return (count);
1964 }
1965
1966 int     prtactive = 0;          /* 1 => print out reclaim of active vnodes */
1967
1968 /*
1969  * Print out a description of a vnode.
1970  */
1971 static char *typename[] =
1972    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1973
1974 void
1975 vprint(label, vp)
1976         char *label;
1977         register struct vnode *vp;
1978 {
1979         char buf[64];
1980
1981         if (label != NULL)
1982                 printf("%s: ", label);
1983         printf("type %s, usecount %d, writecount %d, refcount %d,",
1984                 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1985                 vp->v_holdcnt);
1986         buf[0] = '\0';
1987         if (vp->v_flag & VROOT)
1988                 strcat(buf, "|VROOT");
1989         if (vp->v_flag & VTEXT)
1990                 strcat(buf, "|VTEXT");
1991         if (vp->v_flag & VSYSTEM)
1992                 strcat(buf, "|VSYSTEM");
1993         if (vp->v_flag & VNOFLUSH)
1994                 strcat(buf, "|VNOFLUSH");
1995         if (vp->v_flag & VXLOCK)
1996                 strcat(buf, "|VXLOCK");
1997         if (vp->v_flag & VXWANT)
1998                 strcat(buf, "|VXWANT");
1999         if (vp->v_flag & VBWAIT)
2000                 strcat(buf, "|VBWAIT");
2001         if (vp->v_flag & VALIASED)
2002                 strcat(buf, "|VALIASED");
2003         if (buf[0] != '\0')
2004                 printf(" flags (%s)", &buf[1]);
2005         if (vp->v_data == NULL) {
2006                 printf("\n");
2007         } else {
2008                 printf("\n\t");
2009                 VOP_PRINT(vp);
2010         }
2011 }
2012
2013 #ifdef DEBUG
2014 /*
2015  * List all of the locked vnodes in the system.
2016  * Called when debugging the kernel.
2017  */
2018 void
2019 printlockedvnodes()
2020 {
2021         struct proc *p = current_proc();
2022         struct mount *mp, *nmp;
2023         struct vnode *vp;
2024
2025         printf("Locked vnodes\n");
2026         simple_lock(&mountlist_slock);
2027         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2028                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
2029                         nmp = mp->mnt_list.cqe_next;
2030                         continue;
2031                 }
2032                 for (vp = mp->mnt_vnodelist.lh_first;
2033                      vp != NULL;
2034                      vp = vp->v_mntvnodes.le_next) {
2035                         if (VOP_ISLOCKED(vp))
2036                                 vprint((char *)0, vp);
2037                 }
2038                 simple_lock(&mountlist_slock);
2039                 nmp = mp->mnt_list.cqe_next;
2040                 vfs_unbusy(mp, p);
2041         }
2042         simple_unlock(&mountlist_slock);
2043 }
2044 #endif
2045
2046 static int
2047 build_path(struct vnode *vp, char *buff, int buflen, int *outlen)
2048 {
2049     char *end, *str;
2050     int   i, len, ret=0, counter=0;
2051
2052     end = &buff[buflen-1];
2053     *--end = '\0';
2054
2055     while(vp && VPARENT(vp) != vp) {
2056         // the maximum depth of a file system hierarchy is MAXPATHLEN/2
2057         // (with single-char names separated by slashes).  we panic if
2058         // we've ever looped more than that.
2059         if (counter++ > MAXPATHLEN/2) {
2060             panic("build_path: vnode parent chain is too long! vp 0x%x\n", vp);
2061         }
2062         str = VNAME(vp);
2063         if (VNAME(vp) == NULL) {
2064             if (VPARENT(vp) != NULL) {
2065                 ret = EINVAL;
2066             }
2067             break;
2068         }
2069
2070         // count how long the string is
2071         for(len=0; *str; str++, len++)
2072             /* nothing */;
2073
2074         // check that there's enough space
2075         if ((end - buff) < len) {
2076             ret = ENOSPC;
2077             break;
2078         }
2079
2080         // copy it backwards
2081         for(; len > 0; len--) {
2082             *--end = *--str;
2083         }
2084
2085         // put in the path separator
2086         *--end = '/';
2087
2088         // walk up the chain.
2089         vp = VPARENT(vp);
2090
2091         // check if we're crossing a mount point and
2092         // switch the vp if we are.
2093         if (vp && (vp->v_flag & VROOT)) {
2094             vp = vp->v_mount->mnt_vnodecovered;
2095         }
2096     }
2097
2098     // slide it down to the beginning of the buffer
2099     memmove(buff, end, &buff[buflen] - end);
2100
2101     *outlen = &buff[buflen] - end;
2102
2103     return ret;
2104 }
2105
2106 __private_extern__ int
2107 vn_getpath(struct vnode *vp, char *pathbuf, int *len)
2108 {
2109     return build_path(vp, pathbuf, *len, len);
2110 }
2111
2112
2113
2114 /*
2115  * Top level filesystem related information gathering.
2116  */
2117 int
2118 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
2119         int *name;
2120         u_int namelen;
2121         void *oldp;
2122         size_t *oldlenp;
2123         void *newp;
2124         size_t newlen;
2125         struct proc *p;
2126 {
2127         struct vfsconf *vfsp;
2128         int *username;
2129         u_int usernamelen;
2130         int error;
2131
2132         /*
2133          * The VFS_NUMMNTOPS shouldn't be at name[0] since
2134          * is a VFS generic variable. So now we must check
2135          * namelen so we don't end up covering any UFS
2136          * variables (sinc UFS vfc_typenum is 1).
2137          *
2138          * It should have been:
2139          *    name[0]:  VFS_GENERIC
2140          *    name[1]:  VFS_NUMMNTOPS
2141          */
2142         if (namelen == 1 && name[0] == VFS_NUMMNTOPS) {
2143                 extern unsigned int vfs_nummntops;
2144                 return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops));
2145         }
2146
2147         /* all sysctl names at this level are at least name and field */
2148         if (namelen < 2)
2149                 return (EISDIR);                /* overloaded */
2150         if (name[0] != VFS_GENERIC) {
2151                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2152                         if (vfsp->vfc_typenum == name[0])
2153                                 break;
2154                 if (vfsp == NULL)
2155                         return (EOPNOTSUPP);
2156                 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
2157                     oldp, oldlenp, newp, newlen, p));
2158         }
2159         switch (name[1]) {
2160         case VFS_MAXTYPENUM:
2161                 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
2162         case VFS_CONF:
2163                 if (namelen < 3)
2164                         return (ENOTDIR);       /* overloaded */
2165                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2166                         if (vfsp->vfc_typenum == name[2])
2167                                 break;
2168                 if (vfsp == NULL)
2169                         return (EOPNOTSUPP);
2170                 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
2171                     sizeof(struct vfsconf)));
2172         }
2173         /*
2174          * We need to get back into the general MIB, so we need to re-prepend
2175          * CTL_VFS to our name and try userland_sysctl().
2176          */
2177         usernamelen = namelen + 1;
2178         MALLOC(username, int *, usernamelen * sizeof(*username),
2179             M_TEMP, M_WAITOK);
2180         bcopy(name, username + 1, namelen * sizeof(*name));
2181         username[0] = CTL_VFS;
2182         error = userland_sysctl(p, username, usernamelen, oldp, oldlenp, 1,
2183             newp, newlen, oldlenp);
2184         FREE(username, M_TEMP);
2185         return (error);
2186 }
2187
2188 int kinfo_vdebug = 1;
2189 #define KINFO_VNODESLOP 10
2190 /*
2191  * Dump vnode list (via sysctl).
2192  * Copyout address of vnode followed by vnode.
2193  */
2194 /* ARGSUSED */
2195 int
2196 sysctl_vnode(where, sizep, p)
2197         char *where;
2198         size_t *sizep;
2199         struct proc *p;
2200 {
2201         struct mount *mp, *nmp;
2202         struct vnode *nvp, *vp;
2203         char *bp = where, *savebp;
2204         char *ewhere;
2205         int error;
2206
2207 #define VPTRSZ  sizeof (struct vnode *)
2208 #define VNODESZ sizeof (struct vnode)
2209         if (where == NULL) {
2210                 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2211                 return (0);
2212         }
2213         ewhere = where + *sizep;
2214
2215         simple_lock(&mountlist_slock);
2216         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2217                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
2218                         nmp = mp->mnt_list.cqe_next;
2219                         continue;
2220                 }
2221                 savebp = bp;
2222 again:
2223                 simple_lock(&mntvnode_slock);
2224                 for (vp = mp->mnt_vnodelist.lh_first;
2225                      vp != NULL;
2226                      vp = nvp) {
2227                         /*
2228                          * Check that the vp is still associated with
2229                          * this filesystem.  RACE: could have been
2230                          * recycled onto the same filesystem.
2231                          */
2232                         if (vp->v_mount != mp) {
2233                                 simple_unlock(&mntvnode_slock);
2234                                 if (kinfo_vdebug)
2235                                         printf("kinfo: vp changed\n");
2236                                 bp = savebp;
2237                                 goto again;
2238                         }
2239                         nvp = vp->v_mntvnodes.le_next;
2240                         if (bp + VPTRSZ + VNODESZ > ewhere) {
2241                                 simple_unlock(&mntvnode_slock);
2242                                 vfs_unbusy(mp, p);
2243                                 *sizep = bp - where;
2244                                 return (ENOMEM);
2245                         }
2246                         simple_unlock(&mntvnode_slock);
2247                         if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
2248                             (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) {
2249                                 vfs_unbusy(mp, p);
2250                                 return (error);
2251                         }
2252                         bp += VPTRSZ + VNODESZ;
2253                         simple_lock(&mntvnode_slock);
2254                 }
2255                 simple_unlock(&mntvnode_slock);
2256                 simple_lock(&mountlist_slock);
2257                 nmp = mp->mnt_list.cqe_next;
2258                 vfs_unbusy(mp, p);
2259         }
2260         simple_unlock(&mountlist_slock);
2261
2262         *sizep = bp - where;
2263         return (0);
2264 }
2265
2266 /*
2267  * Check to see if a filesystem is mounted on a block device.
2268  */
2269 int
2270 vfs_mountedon(vp)
2271         struct vnode *vp;
2272 {
2273         struct vnode *vq;
2274         int error = 0;
2275
2276         if (vp->v_specflags & SI_MOUNTEDON)
2277                 return (EBUSY);
2278         if (vp->v_flag & VALIASED) {
2279                 simple_lock(&spechash_slock);
2280                 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
2281                         if (vq->v_rdev != vp->v_rdev ||
2282                             vq->v_type != vp->v_type)
2283                                 continue;
2284                         if (vq->v_specflags & SI_MOUNTEDON) {
2285                                 error = EBUSY;
2286                                 break;
2287                         }
2288                 }
2289                 simple_unlock(&spechash_slock);
2290         }
2291         return (error);
2292 }
2293
2294 /*
2295  * Unmount all filesystems. The list is traversed in reverse order
2296  * of mounting to avoid dependencies.
2297  */
2298 __private_extern__ void
2299 vfs_unmountall()
2300 {
2301         struct mount *mp, *nmp;
2302         struct proc *p = current_proc();
2303
2304         /*
2305          * Since this only runs when rebooting, it is not interlocked.
2306          */
2307         for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2308                 nmp = mp->mnt_list.cqe_prev;
2309                 (void) dounmount(mp, MNT_FORCE, p);
2310         }
2311 }
2312
2313 /*
2314  * Build hash lists of net addresses and hang them off the mount point.
2315  * Called by vfs_export() to set up the lists of export addresses.
2316  */
2317 static int
2318 vfs_hang_addrlist(mp, nep, argp)
2319         struct mount *mp;
2320         struct netexport *nep;
2321         struct export_args *argp;
2322 {
2323         register struct netcred *np;
2324         register struct radix_node_head *rnh;
2325         register int i;
2326         struct radix_node *rn;
2327         struct sockaddr *saddr, *smask = 0;
2328         struct domain *dom;
2329         int error;
2330
2331         if (argp->ex_addrlen == 0) {
2332                 if (mp->mnt_flag & MNT_DEFEXPORTED)
2333                         return (EPERM);
2334                 np = &nep->ne_defexported;
2335                 np->netc_exflags = argp->ex_flags;
2336                 np->netc_anon = argp->ex_anon;
2337                 np->netc_anon.cr_ref = 1;
2338                 mp->mnt_flag |= MNT_DEFEXPORTED;
2339                 return (0);
2340         }
2341         i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2342         MALLOC(np, struct netcred *, i, M_NETADDR, M_WAITOK);
2343         bzero((caddr_t)np, i);
2344         saddr = (struct sockaddr *)(np + 1);
2345         if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
2346                 goto out;
2347         if (saddr->sa_len > argp->ex_addrlen)
2348                 saddr->sa_len = argp->ex_addrlen;
2349         if (argp->ex_masklen) {
2350                 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
2351                 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
2352                 if (error)
2353                         goto out;
2354                 if (smask->sa_len > argp->ex_masklen)
2355                         smask->sa_len = argp->ex_masklen;
2356         }
2357         i = saddr->sa_family;
2358         if ((rnh = nep->ne_rtable[i]) == 0) {
2359                 /*
2360                  * Seems silly to initialize every AF when most are not
2361                  * used, do so on demand here
2362                  */
2363                 for (dom = domains; dom; dom = dom->dom_next)
2364                         if (dom->dom_family == i && dom->dom_rtattach) {
2365                                 dom->dom_rtattach((void **)&nep->ne_rtable[i],
2366                                         dom->dom_rtoffset);
2367                                 break;
2368                         }
2369                 if ((rnh = nep->ne_rtable[i]) == 0) {
2370                         error = ENOBUFS;
2371                         goto out;
2372                 }
2373         }
2374         rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
2375                 np->netc_rnodes);
2376         if (rn == 0) {
2377                 /*
2378                  * One of the reasons that rnh_addaddr may fail is that
2379                  * the entry already exists. To check for this case, we
2380                  * look up the entry to see if it is there. If so, we
2381                  * do not need to make a new entry but do return success.
2382                  */
2383                 _FREE(np, M_NETADDR);
2384                 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
2385                 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
2386                     ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
2387                     !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
2388                             (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
2389                         return (0);
2390                 return (EPERM);
2391         }
2392         np->netc_exflags = argp->ex_flags;
2393         np->netc_anon = argp->ex_anon;
2394         np->netc_anon.cr_ref = 1;
2395         return (0);
2396 out:
2397         _FREE(np, M_NETADDR);
2398         return (error);
2399 }
2400
2401 /* ARGSUSED */
2402 static int
2403 vfs_free_netcred(rn, w)
2404         struct radix_node *rn;
2405         caddr_t w;
2406 {
2407         register struct radix_node_head *rnh = (struct radix_node_head *)w;
2408
2409         (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
2410         _FREE((caddr_t)rn, M_NETADDR);
2411         return (0);
2412 }
2413
2414 /*
2415  * Free the net address hash lists that are hanging off the mount points.
2416  */
2417 static void
2418 vfs_free_addrlist(nep)
2419         struct netexport *nep;
2420 {
2421         register int i;
2422         register struct radix_node_head *rnh;
2423
2424         for (i = 0; i <= AF_MAX; i++)
2425                 if (rnh = nep->ne_rtable[i]) {
2426                         (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
2427                             (caddr_t)rnh);
2428                         _FREE((caddr_t)rnh, M_RTABLE);
2429                         nep->ne_rtable[i] = 0;
2430                 }
2431 }
2432
2433 int
2434 vfs_export(mp, nep, argp)
2435         struct mount *mp;
2436         struct netexport *nep;
2437         struct export_args *argp;
2438 {
2439         int error;
2440
2441         if (argp->ex_flags & MNT_DELEXPORT) {
2442                 vfs_free_addrlist(nep);
2443                 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2444         }
2445         if (argp->ex_flags & MNT_EXPORTED) {
2446                 if (error = vfs_hang_addrlist(mp, nep, argp))
2447                         return (error);
2448                 mp->mnt_flag |= MNT_EXPORTED;
2449         }
2450         return (0);
2451 }
2452
2453 struct netcred *
2454 vfs_export_lookup(mp, nep, nam)
2455         register struct mount *mp;
2456         struct netexport *nep;
2457         struct mbuf *nam;
2458 {
2459         register struct netcred *np;
2460         register struct radix_node_head *rnh;
2461         struct sockaddr *saddr;
2462
2463         np = NULL;
2464         if (mp->mnt_flag & MNT_EXPORTED) {
2465                 /*
2466                  * Lookup in the export list first.
2467                  */
2468                 if (nam != NULL) {
2469                         saddr = mtod(nam, struct sockaddr *);
2470                         rnh = nep->ne_rtable[saddr->sa_family];
2471                         if (rnh != NULL) {
2472                                 np = (struct netcred *)
2473                                         (*rnh->rnh_matchaddr)((caddr_t)saddr,
2474                                                               rnh);
2475                                 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2476                                         np = NULL;
2477                         }
2478                 }
2479                 /*
2480                  * If no address match, use the default if it exists.
2481                  */
2482                 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2483                         np = &nep->ne_defexported;
2484         }
2485         return (np);
2486 }
2487
2488 /*
2489  * try to reclaim vnodes from the memory
2490  * object cache
2491  */
2492 static int
2493 vm_object_cache_reclaim(int count)
2494 {
2495         int cnt;
2496         void vnode_pager_release_from_cache(int *);
2497
2498         /* attempt to reclaim vnodes from VM object cache */
2499         cnt = count;
2500         vnode_pager_release_from_cache(&cnt);
2501         return(cnt);
2502 }
2503
2504 /*
2505  * Release memory object reference held by inactive vnodes
2506  * and then try to reclaim some vnodes from the memory
2507  * object cache
2508  */
2509 static int
2510 vnreclaim(int count)
2511 {
2512         int i, loopcnt;
2513         struct vnode *vp;
2514         int err;
2515         struct proc *p;
2516
2517         i = 0;
2518         loopcnt = 0;
2519
2520         /* Try to release "count" vnodes from the inactive list */
2521 restart:
2522         if (++loopcnt > inactivevnodes) {
2523                 /*
2524                  * I did my best trying to reclaim the vnodes.
2525                  * Do not try any more as that would only lead to
2526                  * long latencies. Also in the worst case
2527                  * this can get totally CPU bound.
2528                  * Just fall though and attempt a reclaim of VM
2529                  * object cache
2530                  */
2531                 goto out;
2532         }
2533
2534         simple_lock(&vnode_free_list_slock);
2535         for (vp = TAILQ_FIRST(&vnode_inactive_list);
2536                         (vp != NULLVP) && (i < count);
2537                         vp = TAILQ_NEXT(vp, v_freelist)) {
2538
2539                 if (!simple_lock_try(&vp->v_interlock))
2540                         continue;
2541
2542                 if (vp->v_usecount != 1)
2543                         panic("vnreclaim: v_usecount");
2544
2545                 if(!UBCINFOEXISTS(vp)) {
2546                         if (vp->v_type == VBAD) {
2547                                 VREMINACTIVE("vnreclaim", vp);
2548                                 simple_unlock(&vp->v_interlock);
2549                                 continue;
2550                         } else
2551                                 panic("non UBC vnode on inactive list");
2552                                 /* Should not reach here */
2553                 }
2554
2555                 /* If vnode is already being reclaimed, wait */
2556                 if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
2557                         vp->v_flag |= VXWANT;
2558                         simple_unlock(&vp->v_interlock);
2559                         simple_unlock(&vnode_free_list_slock);
2560                         (void)tsleep((caddr_t)vp, PINOD, "vocr", 0);
2561                         goto restart;
2562                 }
2563
2564                 /*
2565                  * if the vnode is being initialized,
2566                  * skip over it
2567                  */
2568                 if (ISSET(vp->v_flag,  VUINIT)) {
2569                         SET(vp->v_flag, VUWANT);
2570                         simple_unlock(&vp->v_interlock);
2571                         continue;
2572                 }
2573
2574                 VREMINACTIVE("vnreclaim", vp);
2575                 simple_unlock(&vnode_free_list_slock);
2576
2577                 if (ubc_issetflags(vp, UI_WASMAPPED)) {
2578                         /*
2579                          * We should not reclaim as it is likely
2580                          * to be in use. Let it die a natural death.
2581                          * Release the UBC reference if one exists
2582                          * and put it back at the tail.
2583                          */
2584                         simple_unlock(&vp->v_interlock);
2585                         if (ubc_release_named(vp)) {
2586                                 if (UBCINFOEXISTS(vp)) {
2587                                         simple_lock(&vp->v_interlock);
2588                                         if (vp->v_usecount == 1 && !VONLIST(vp))
2589                                                 vinactive(vp);
2590                                         simple_unlock(&vp->v_interlock);
2591                                 }
2592                         } else {
2593                             simple_lock(&vp->v_interlock);
2594                                 vinactive(vp);
2595                                 simple_unlock(&vp->v_interlock);
2596                         }
2597                 } else {
2598                         int didhold;
2599
2600                         VORECLAIM_ENABLE(vp);
2601
2602                         /*
2603                          * scrub the dirty pages and invalidate the buffers
2604                          */
2605                         p = current_proc();
2606                         err = vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p);
2607                         if (err) {
2608                                 /* cannot reclaim */
2609                                 simple_lock(&vp->v_interlock);
2610                                 vinactive(vp);
2611                                 VORECLAIM_DISABLE(vp);
2612                                 i++;
2613                                 simple_unlock(&vp->v_interlock);
2614                                 goto restart;
2615                         }
2616
2617                         /* keep the vnode alive so we can kill it */
2618                         simple_lock(&vp->v_interlock);
2619                         if(vp->v_usecount != 1)
2620                                 panic("VOCR: usecount race");
2621                         vp->v_usecount++;
2622                         simple_unlock(&vp->v_interlock);
2623
2624                         /* clean up the state in VM without invalidating */
2625                         didhold = ubc_hold(vp);
2626                         if (didhold)
2627                                 (void)ubc_clean(vp, 0);
2628
2629                         /* flush and invalidate buffers associated with the vnode */
2630                         if (vp->v_tag == VT_NFS)
2631                                 nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
2632                         else
2633                                 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
2634
2635                         /*
2636                          * Note: for the v_usecount == 2 case, VOP_INACTIVE
2637                          * has not yet been called.  Call it now while vp is
2638                          * still locked, it will also release the lock.
2639                          */
2640                         if (vp->v_usecount == 2)
2641                                 VOP_INACTIVE(vp, p);
2642                         else
2643                                 VOP_UNLOCK(vp, 0, p);
2644
2645                         if (didhold)
2646                                 ubc_rele(vp);
2647
2648                         /*
2649                          * destroy the ubc named reference.
2650                          * If we can't because it is held for I/Os
2651                          * in progress, just put it back on the inactive
2652                          * list and move on.  Otherwise, the paging reference
2653                          * is toast (and so is this vnode?).
2654                          */
2655                         if (ubc_destroy_named(vp)) {
2656                             i++;
2657                         }
2658                         simple_lock(&vp->v_interlock);
2659                         VORECLAIM_DISABLE(vp);
2660                         simple_unlock(&vp->v_interlock);
2661                         vrele(vp);  /* release extra use we added here */
2662                 }
2663                 /* inactive list lock was released, must restart */
2664                 goto restart;
2665         }
2666         simple_unlock(&vnode_free_list_slock);
2667
2668         vnode_reclaim_tried += i;
2669 out:
2670         i = vm_object_cache_reclaim(count);
2671         vnode_objects_reclaimed += i;
2672
2673         return(i);
2674 }
2675
2676 /*
2677  * This routine is called from vnode_pager_no_senders()
2678  * which in turn can be called with vnode locked by vnode_uncache()
2679  * But it could also get called as a result of vm_object_cache_trim().
2680  * In that case lock state is unknown.
2681  * AGE the vnode so that it gets recycled quickly.
2682  * Check lock status to decide whether to call vput() or vrele().
2683  */
2684 __private_extern__ void
2685 vnode_pager_vrele(struct vnode *vp)
2686 {
2687
2688         boolean_t       funnel_state;
2689         int isvnreclaim = 1;
2690
2691         funnel_state = thread_funnel_set(kernel_flock, TRUE);
2692
2693         /* Mark the vnode to be recycled */
2694         vagevp(vp);
2695
2696         simple_lock(&vp->v_interlock);
2697         /*
2698          * If a vgone (or vclean) is already in progress,
2699          * Do not bother with the ubc_info cleanup.
2700          * Let the vclean deal with it.
2701          */
2702         if (vp->v_flag & VXLOCK) {
2703                 CLR(vp->v_flag, VTERMINATE);
2704                 if (ISSET(vp->v_flag, VTERMWANT)) {
2705                         CLR(vp->v_flag, VTERMWANT);
2706                         wakeup((caddr_t)&vp->v_ubcinfo);
2707                 }
2708                 simple_unlock(&vp->v_interlock);
2709                 vrele(vp);
2710                 (void) thread_funnel_set(kernel_flock, funnel_state);
2711                 return;
2712         }
2713
2714         /* It's dead, Jim! */
2715         if (!ISSET(vp->v_flag, VORECLAIM)) {
2716                 /*
2717                  * called as a result of eviction of the memory
2718                  * object from the memory object cache
2719                  */
2720                 isvnreclaim = 0;
2721
2722                 /* So serialize vnode operations */
2723                 VORECLAIM_ENABLE(vp);
2724         }
2725         if (!ISSET(vp->v_flag, VTERMINATE))
2726                 SET(vp->v_flag, VTERMINATE);
2727
2728         cache_purge(vp);
2729
2730         if (UBCINFOEXISTS(vp)) {
2731                 struct ubc_info *uip = vp->v_ubcinfo;
2732
2733                 if (ubc_issetflags(vp, UI_WASMAPPED))
2734                         SET(vp->v_flag, VWASMAPPED);
2735
2736                 vp->v_ubcinfo = UBC_NOINFO;  /* catch bad accesses */
2737                 simple_unlock(&vp->v_interlock);
2738                 ubc_info_deallocate(uip);
2739         } else {
2740                 if ((vp->v_type == VBAD) && ((vp)->v_ubcinfo != UBC_INFO_NULL)
2741                         && ((vp)->v_ubcinfo != UBC_NOINFO)) {
2742                         struct ubc_info *uip = vp->v_ubcinfo;
2743
2744                         vp->v_ubcinfo = UBC_NOINFO;  /* catch bad accesses */
2745                         simple_unlock(&vp->v_interlock);
2746                         ubc_info_deallocate(uip);
2747                 } else {
2748                         simple_unlock(&vp->v_interlock);
2749                 }
2750         }
2751
2752         CLR(vp->v_flag, VTERMINATE);
2753
2754         if (vp->v_type != VBAD){
2755                 vgone(vp);      /* revoke the vnode */
2756                 vrele(vp);      /* and drop the reference */
2757         } else
2758                 vrele(vp);
2759
2760         if (ISSET(vp->v_flag, VTERMWANT)) {
2761                 CLR(vp->v_flag, VTERMWANT);
2762                 wakeup((caddr_t)&vp->v_ubcinfo);
2763         }
2764         if (!isvnreclaim)
2765                 VORECLAIM_DISABLE(vp);
2766         (void) thread_funnel_set(kernel_flock, funnel_state);
2767         return;
2768 }
2769
2770
2771 #if DIAGNOSTIC
2772 int walk_vnodes_debug=0;
2773
2774 void
2775 walk_allvnodes()
2776 {
2777         struct mount *mp, *nmp;
2778         struct vnode *vp;
2779         int cnt = 0;
2780
2781         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2782                 for (vp = mp->mnt_vnodelist.lh_first;
2783                      vp != NULL;
2784                      vp = vp->v_mntvnodes.le_next) {
2785                         if (vp->v_usecount < 0){
2786                                 if(walk_vnodes_debug) {
2787                                         printf("vp is %x\n",vp);
2788                                 }
2789                         }
2790                 }
2791                 nmp = mp->mnt_list.cqe_next;
2792         }
2793         for (cnt = 0, vp = vnode_free_list.tqh_first;
2794                 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2795                 if ((vp->v_usecount < 0) && walk_vnodes_debug) {
2796                         if(walk_vnodes_debug) {
2797                                 printf("vp is %x\n",vp);
2798                         }
2799                 }
2800         }
2801         printf("%d - free\n", cnt);
2802
2803         for (cnt = 0, vp = vnode_inactive_list.tqh_first;
2804                 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2805                 if ((vp->v_usecount < 0) && walk_vnodes_debug) {
2806                         if(walk_vnodes_debug) {
2807                                 printf("vp is %x\n",vp);
2808                         }
2809                 }
2810         }
2811         printf("%d - inactive\n", cnt);
2812 }
2813 #endif /* DIAGNOSTIC */
2814
2815
2816 struct x_constraints {
2817         u_int32_t x_maxreadcnt;
2818         u_int32_t x_maxsegreadsize;
2819         u_int32_t x_maxsegwritesize;
2820 };
2821
2822
2823 void
2824 vfs_io_attributes(vp, flags, iosize, vectors)
2825         struct vnode    *vp;
2826         int     flags;  /* B_READ or B_WRITE */
2827         int     *iosize;
2828         int     *vectors;
2829 {
2830         struct mount *mp;
2831
2832         /* start with "reasonable" defaults */
2833         *iosize = MAXPHYS;
2834         *vectors = 32;
2835
2836         mp = vp->v_mount;
2837         if (mp != NULL) {
2838                 switch (flags) {
2839                 case B_READ:
2840                         if (mp->mnt_kern_flag & MNTK_IO_XINFO)
2841                                 *iosize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxreadcnt;
2842                         else
2843                                 *iosize = mp->mnt_maxreadcnt;
2844                         *vectors = mp->mnt_segreadcnt;
2845                         break;
2846                 case B_WRITE:
2847                         *iosize = mp->mnt_maxwritecnt;
2848                         *vectors = mp->mnt_segwritecnt;
2849                         break;
2850                 default:
2851                         break;
2852                 }
2853                 if (*iosize == 0)
2854                         *iosize = MAXPHYS;
2855                 if (*vectors == 0)
2856                         *vectors = 32;
2857         }
2858         return;
2859 }
2860
2861 __private_extern__
2862 void
2863 vfs_io_maxsegsize(vp, flags, maxsegsize)
2864         struct vnode    *vp;
2865         int     flags;  /* B_READ or B_WRITE */
2866         int     *maxsegsize;
2867 {
2868         struct mount *mp;
2869
2870         /* start with "reasonable" default */
2871         *maxsegsize = MAXPHYS;
2872
2873         mp = vp->v_mount;
2874         if (mp != NULL) {
2875                 switch (flags) {
2876                 case B_READ:
2877                         if (mp->mnt_kern_flag & MNTK_IO_XINFO)
2878                                 *maxsegsize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegreadsize;
2879                         else
2880                                 /*
2881                                  * if the extended info doesn't exist
2882                                  * then use the maxread I/O size as the
2883                                  * max segment size... this is the previous behavior
2884                                  */
2885                                 *maxsegsize = mp->mnt_maxreadcnt;
2886                         break;
2887                 case B_WRITE:
2888                         if (mp->mnt_kern_flag & MNTK_IO_XINFO)
2889                                 *maxsegsize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegwritesize;
2890                         else
2891                                 /*
2892                                  * if the extended info doesn't exist
2893                                  * then use the maxwrite I/O size as the
2894                                  * max segment size... this is the previous behavior
2895                                  */
2896                                 *maxsegsize = mp->mnt_maxwritecnt;
2897                         break;
2898                 default:
2899                         break;
2900                 }
2901                 if (*maxsegsize == 0)
2902                         *maxsegsize = MAXPHYS;
2903         }
2904 }
2905
2906
2907 #include <sys/disk.h>
2908
2909
2910 int
2911 vfs_init_io_attributes(devvp, mp)
2912         struct vnode *devvp;
2913         struct mount *mp;
2914 {
2915         int error;
2916         off_t readblockcnt;
2917         off_t writeblockcnt;
2918         off_t readmaxcnt;
2919         off_t writemaxcnt;
2920         off_t readsegcnt;
2921         off_t writesegcnt;
2922         off_t readsegsize;
2923         off_t writesegsize;
2924         u_long blksize;
2925
2926         u_int64_t temp;
2927
2928         struct proc *p = current_proc();
2929         struct  ucred *cred = p->p_ucred;
2930
2931         int isvirtual = 0;
2932         /*
2933          * determine if this mount point exists on the same device as the root
2934          * partition... if so, then it comes under the hard throttle control
2935          */
2936         int        thisunit = -1;
2937         static int rootunit = -1;
2938         extern struct vnode *rootvp;
2939
2940         if (rootunit == -1) {
2941                 if (VOP_IOCTL(rootvp, DKIOCGETBSDUNIT, (caddr_t)&rootunit, 0, cred, p))
2942                         rootunit = -1;
2943                 else if (rootvp == devvp)
2944                         mp->mnt_kern_flag |= MNTK_ROOTDEV;
2945         }
2946         if (devvp != rootvp && rootunit != -1) {
2947                 if (VOP_IOCTL(devvp, DKIOCGETBSDUNIT, (caddr_t)&thisunit, 0, cred, p) == 0) {
2948                         if (thisunit == rootunit)
2949                                 mp->mnt_kern_flag |= MNTK_ROOTDEV;
2950                 }
2951         }
2952         if (VOP_IOCTL(devvp, DKIOCGETISVIRTUAL, (caddr_t)&isvirtual, 0, cred, p) == 0) {
2953                 if (isvirtual)
2954                         mp->mnt_kern_flag |= MNTK_VIRTUALDEV;
2955         }
2956
2957         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD,
2958                                 (caddr_t)&readblockcnt, 0, cred, p)))
2959                 return (error);
2960
2961         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE,
2962                                 (caddr_t)&writeblockcnt, 0, cred, p)))
2963                 return (error);
2964
2965         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTREAD,
2966                                 (caddr_t)&readmaxcnt, 0, cred, p)))
2967                 return (error);
2968
2969         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTWRITE,
2970                                 (caddr_t)&writemaxcnt, 0, cred, p)))
2971                 return (error);
2972
2973         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD,
2974                                 (caddr_t)&readsegcnt, 0, cred, p)))
2975                 return (error);
2976
2977         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE,
2978                                 (caddr_t)&writesegcnt, 0, cred, p)))
2979                 return (error);
2980
2981         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTREAD,
2982                                 (caddr_t)&readsegsize, 0, cred, p)))
2983                 return (error);
2984
2985         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTWRITE,
2986                                 (caddr_t)&writesegsize, 0, cred, p)))
2987                 return (error);
2988
2989         if ((error = VOP_IOCTL(devvp, DKIOCGETBLOCKSIZE,
2990                                 (caddr_t)&blksize, 0, cred, p)))
2991                 return (error);
2992
2993
2994         if ( !(mp->mnt_kern_flag & MNTK_IO_XINFO)) {
2995                 MALLOC(mp->mnt_xinfo_ptr, void *, sizeof(struct x_constraints), M_TEMP, M_WAITOK);
2996                 mp->mnt_kern_flag |= MNTK_IO_XINFO;
2997         }
2998
2999         if (readmaxcnt)
3000                 temp = (readmaxcnt > UINT32_MAX) ? UINT32_MAX : readmaxcnt;
3001         else {
3002                 if (readblockcnt) {
3003                         temp = readblockcnt * blksize;
3004                         temp = (temp > UINT32_MAX) ? UINT32_MAX : temp;
3005                 } else
3006                         temp = MAXPHYS;
3007         }
3008         ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxreadcnt = (u_int32_t)temp;
3009
3010         if (writemaxcnt)
3011                 temp = (writemaxcnt > UINT32_MAX) ? UINT32_MAX : writemaxcnt;
3012         else {
3013                 if (writeblockcnt) {
3014                         temp = writeblockcnt * blksize;
3015                         temp = (temp > UINT32_MAX) ? UINT32_MAX : temp;
3016                 } else
3017                         temp = MAXPHYS;
3018         }
3019         mp->mnt_maxwritecnt = (u_int32_t)temp;
3020
3021         if (readsegcnt) {
3022                 temp = (readsegcnt > UINT16_MAX) ? UINT16_MAX : readsegcnt;
3023                 mp->mnt_segreadcnt = (u_int16_t)temp;
3024         }
3025         if (writesegcnt) {
3026                 temp = (writesegcnt > UINT16_MAX) ? UINT16_MAX : writesegcnt;
3027                 mp->mnt_segwritecnt = (u_int16_t)temp;
3028         }
3029         if (readsegsize)
3030                 temp = (readsegsize > UINT32_MAX) ? UINT32_MAX : readsegsize;
3031         else
3032                 temp = mp->mnt_maxreadcnt;
3033         ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegreadsize = (u_int32_t)temp;
3034
3035         if (writesegsize)
3036                 temp = (writesegsize > UINT32_MAX) ? UINT32_MAX : writesegsize;
3037         else
3038                 temp = mp->mnt_maxwritecnt;
3039         ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegwritesize = (u_int32_t)temp;
3040
3041         return (error);
3042 }
3043
3044 static struct klist fs_klist;
3045
3046 void
3047 vfs_event_init(void)
3048 {
3049
3050         klist_init(&fs_klist);
3051 }
3052
3053 void
3054 vfs_event_signal(fsid_t *fsid, u_int32_t event, intptr_t data)
3055 {
3056
3057         KNOTE(&fs_klist, event);
3058 }
3059
3060 /*
3061  * return the number of mounted filesystems.
3062  */
3063 static int
3064 sysctl_vfs_getvfscnt(void)
3065 {
3066         struct mount *mp;
3067         int ret = 0;
3068
3069         simple_lock(&mountlist_slock);
3070         CIRCLEQ_FOREACH(mp, &mountlist, mnt_list)
3071             ret++;
3072         simple_unlock(&mountlist_slock);
3073         return (ret);
3074 }
3075
3076 /*
3077  * fill in the array of fsid_t's up to a max of 'count', the actual
3078  * number filled in will be set in '*actual'.  If there are more fsid_t's
3079  * than room in fsidlst then ENOMEM will be returned and '*actual' will
3080  * have the actual count.
3081  * having *actual filled out even in the error case is depended upon.
3082  */
3083 static int
3084 sysctl_vfs_getvfslist(fsid_t *fsidlst, int count, int *actual)
3085 {
3086         struct mount *mp;
3087
3088         *actual = 0;
3089         simple_lock(&mountlist_slock);
3090         CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
3091                 (*actual)++;
3092                 if (*actual <= count)
3093                         fsidlst[(*actual) - 1] = mp->mnt_stat.f_fsid;
3094         }
3095         simple_unlock(&mountlist_slock);
3096         return (*actual <= count ? 0 : ENOMEM);
3097 }
3098
3099 static int
3100 sysctl_vfs_vfslist SYSCTL_HANDLER_ARGS
3101 {
3102         int actual, error;
3103         size_t space;
3104         fsid_t *fsidlst;
3105
3106         /* This is a readonly node. */
3107         if (req->newptr != NULL)
3108                 return (EPERM);
3109
3110         /* they are querying us so just return the space required. */
3111         if (req->oldptr == NULL) {
3112                 req->oldidx = sysctl_vfs_getvfscnt() * sizeof(fsid_t);
3113                 return 0;
3114         }
3115 again:
3116         /*
3117          * Retrieve an accurate count of the amount of space required to copy
3118          * out all the fsids in the system.
3119          */
3120         space = req->oldlen;
3121         req->oldlen = sysctl_vfs_getvfscnt() * sizeof(fsid_t);
3122
3123         /* they didn't give us enough space. */
3124         if (space < req->oldlen)
3125                 return (ENOMEM);
3126
3127         MALLOC(fsidlst, fsid_t *, req->oldlen, M_TEMP, M_WAITOK);
3128         error = sysctl_vfs_getvfslist(fsidlst, req->oldlen / sizeof(fsid_t),
3129             &actual);
3130         /*
3131          * If we get back ENOMEM, then another mount has been added while we
3132          * slept in malloc above.  If this is the case then try again.
3133          */
3134         if (error == ENOMEM) {
3135                 FREE(fsidlst, M_TEMP);
3136                 req->oldlen = space;
3137                 goto again;
3138         }
3139         if (error == 0) {
3140                 error = SYSCTL_OUT(req, fsidlst, actual * sizeof(fsid_t));
3141         }
3142         FREE(fsidlst, M_TEMP);
3143         return (error);
3144 }
3145
3146 /*
3147  * Do a sysctl by fsid.
3148  */
3149 static int
3150 sysctl_vfs_ctlbyfsid SYSCTL_HANDLER_ARGS
3151 {
3152         struct vfsidctl vc;
3153         struct mount *mp;
3154         struct statfs *sp;
3155         struct proc *p;
3156         int *name;
3157         int error, flags, namelen;
3158
3159         name = arg1;
3160         namelen = arg2;
3161         p = req->p;
3162
3163         error = SYSCTL_IN(req, &vc, sizeof(vc));
3164         if (error)
3165                 return (error);
3166         if (vc.vc_vers != VFS_CTL_VERS1)
3167                 return (EINVAL);
3168         mp = vfs_getvfs(&vc.vc_fsid);
3169         if (mp == NULL)
3170                 return (ENOENT);
3171         /* reset so that the fs specific code can fetch it. */
3172         req->newidx = 0;
3173         /*
3174          * Note if this is a VFS_CTL then we pass the actual sysctl req
3175          * in for "oldp" so that the lower layer can DTRT and use the
3176          * SYSCTL_IN/OUT routines.
3177          */
3178         if (mp->mnt_op->vfs_sysctl != NULL) {
3179                 error = mp->mnt_op->vfs_sysctl(name, namelen,
3180                     req, NULL, NULL, 0, req->p);
3181                 if (error != EOPNOTSUPP)
3182                         return (error);
3183         }
3184         switch (name[0]) {
3185         case VFS_CTL_UMOUNT:
3186                 VCTLTOREQ(&vc, req);
3187                 error = SYSCTL_IN(req, &flags, sizeof(flags));
3188                 if (error)
3189                         break;
3190                 error = safedounmount(mp, flags, p);
3191                 break;
3192         case VFS_CTL_STATFS:
3193                 VCTLTOREQ(&vc, req);
3194                 error = SYSCTL_IN(req, &flags, sizeof(flags));
3195                 if (error)
3196                         break;
3197                 sp = &mp->mnt_stat;
3198                 if (((flags & MNT_NOWAIT) == 0 || (flags & MNT_WAIT)) &&
3199                     (error = VFS_STATFS(mp, sp, p)))
3200                         return (error);
3201                 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3202                 error = SYSCTL_OUT(req, sp, sizeof(*sp));
3203                 break;
3204         default:
3205                 return (EOPNOTSUPP);
3206         }
3207         return (error);
3208 }
3209
3210 static int      filt_fsattach(struct knote *kn);
3211 static void     filt_fsdetach(struct knote *kn);
3212 static int      filt_fsevent(struct knote *kn, long hint);
3213
3214 struct filterops fs_filtops =
3215         { 0, filt_fsattach, filt_fsdetach, filt_fsevent };
3216
3217 static int
3218 filt_fsattach(struct knote *kn)
3219 {
3220
3221         kn->kn_flags |= EV_CLEAR;
3222         KNOTE_ATTACH(&fs_klist, kn);
3223         return (0);
3224 }
3225
3226 static void
3227 filt_fsdetach(struct knote *kn)
3228 {
3229
3230         KNOTE_DETACH(&fs_klist, kn);
3231 }
3232
3233 static int
3234 filt_fsevent(struct knote *kn, long hint)
3235 {
3236
3237         kn->kn_fflags |= hint;
3238         return (kn->kn_fflags != 0);
3239 }
3240
3241 static int
3242 sysctl_vfs_noremotehang SYSCTL_HANDLER_ARGS
3243 {
3244         int out, error;
3245         pid_t pid;
3246         size_t space;
3247         struct proc *p;
3248
3249         /* We need a pid. */
3250         if (req->newptr == NULL)
3251                 return (EINVAL);
3252
3253         error = SYSCTL_IN(req, &pid, sizeof(pid));
3254         if (error)
3255                 return (error);
3256
3257         p = pfind(pid < 0 ? -pid : pid);
3258         if (p == NULL)
3259                 return (ESRCH);
3260
3261         /*
3262          * Fetching the value is ok, but we only fetch if the old
3263          * pointer is given.
3264          */
3265         if (req->oldptr != NULL) {
3266                 out = !((p->p_flag & P_NOREMOTEHANG) == 0);
3267                 error = SYSCTL_OUT(req, &out, sizeof(out));
3268                 return (error);
3269         }
3270
3271         /* cansignal offers us enough security. */
3272         if (p != req->p && suser(req->p->p_ucred, &req->p->p_acflag) != 0)
3273                 return (EPERM);
3274
3275         if (pid < 0)
3276                 p->p_flag &= ~P_NOREMOTEHANG;
3277         else
3278                 p->p_flag |= P_NOREMOTEHANG;
3279
3280         return (0);
3281 }
3282 /* the vfs.generic. branch. */
3283 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RW, 0, "vfs generic hinge");
3284 /* retreive a list of mounted filesystem fsid_t */
3285 SYSCTL_PROC(_vfs_generic, OID_AUTO, vfsidlist, CTLFLAG_RD,
3286     0, 0, sysctl_vfs_vfslist, "S,fsid", "List of mounted filesystem ids");
3287 /* perform operations on filesystem via fsid_t */
3288 SYSCTL_NODE(_vfs_generic, OID_AUTO, ctlbyfsid, CTLFLAG_RW,
3289     sysctl_vfs_ctlbyfsid, "ctlbyfsid");
3290 SYSCTL_PROC(_vfs_generic, OID_AUTO, noremotehang, CTLFLAG_RW,
3291     0, 0, sysctl_vfs_noremotehang, "I", "noremotehang");
3292