bsd/vfs/vfs_subr.c

   1 /*
   2  * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  23 /*
  24  * Copyright (c) 1989, 1993
  25  *      The Regents of the University of California.  All rights reserved.
  26  * (c) UNIX System Laboratories, Inc.
  27  * All or some portions of this file are derived from material licensed
  28  * to the University of California by American Telephone and Telegraph
  29  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  30  * the permission of UNIX System Laboratories, Inc.
  31  *
  32  * Redistribution and use in source and binary forms, with or without
  33  * modification, are permitted provided that the following conditions
  34  * are met:
  35  * 1. Redistributions of source code must retain the above copyright
  36  *    notice, this list of conditions and the following disclaimer.
  37  * 2. Redistributions in binary form must reproduce the above copyright
  38  *    notice, this list of conditions and the following disclaimer in the
  39  *    documentation and/or other materials provided with the distribution.
  40  * 3. All advertising materials mentioning features or use of this software
  41  *    must display the following acknowledgement:
  42  *      This product includes software developed by the University of
  43  *      California, Berkeley and its contributors.
  44  * 4. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  *      @(#)vfs_subr.c  8.31 (Berkeley) 5/26/95
  61  */
  62
  63 /*
  64  * External virtual filesystem routines
  65  */
  66
  67 #undef  DIAGNOSTIC
  68 #define DIAGNOSTIC 1
  69
  70 #include <sys/param.h>
  71 #include <sys/systm.h>
  72 #include <sys/proc.h>
  73 #include <sys/mount.h>
  74 #include <sys/time.h>
  75 #include <sys/vnode.h>
  76 #include <sys/stat.h>
  77 #include <sys/namei.h>
  78 #include <sys/ucred.h>
  79 #include <sys/buf.h>
  80 #include <sys/errno.h>
  81 #include <sys/malloc.h>
  82 #include <sys/domain.h>
  83 #include <sys/mbuf.h>
  84 #include <sys/syslog.h>
  85 #include <sys/ubc.h>
  86 #include <sys/vm.h>
  87 #include <sys/sysctl.h>
  88
  89 #include <kern/assert.h>
  90
  91 #include <miscfs/specfs/specdev.h>
  92
  93 #include <mach/mach_types.h>
  94 #include <mach/memory_object_types.h>
  95
  96
  97 enum vtype iftovt_tab[16] = {
  98         VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
  99         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
 100 };
 101 int     vttoif_tab[9] = {
 102         0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
 103         S_IFSOCK, S_IFIFO, S_IFMT,
 104 };
 105
 106 static void vfree(struct vnode *vp);
 107 static void vinactive(struct vnode *vp);
 108 static int vnreclaim(int count);
 109 extern kern_return_t
 110         adjust_vm_object_cache(vm_size_t oval, vm_size_t nval);
 111
 112 TAILQ_HEAD(freelst, vnode) vnode_free_list;     /* vnode free list */
 113 TAILQ_HEAD(inactivelst, vnode) vnode_inactive_list;     /* vnode inactive list */
 114 struct mntlist mountlist;                       /* mounted filesystem list */
 115
 116 #if DIAGNOSTIC
 117 #define VLISTCHECK(fun, vp, list)       \
 118         if ((vp)->v_freelist.tqe_prev == (struct vnode **)0xdeadb) \
 119                 panic("%s: %s vnode not on %slist", (fun), (list), (list));
 120
 121 #define VINACTIVECHECK(fun, vp, expected)       \
 122         do {    \
 123                 int __is_inactive = ISSET((vp)->v_flag, VUINACTIVE);    \
 124                 if (__is_inactive ^ expected)   \
 125                         panic("%s: %sinactive vnode, expected %s", (fun),       \
 126                                 __is_inactive? "" : "not ",     \
 127                                 expected? "inactive": "not inactive"); \
 128         } while(0)
 129 #else
 130 #define VLISTCHECK(fun, vp, list)
 131 #define VINACTIVECHECK(fun, vp, expected)
 132 #endif /* DIAGNOSTIC */
 133
 134 #define VLISTNONE(vp)   \
 135         do {    \
 136                 (vp)->v_freelist.tqe_next = (struct vnode *)0;  \
 137                 (vp)->v_freelist.tqe_prev = (struct vnode **)0xdeadb;   \
 138         } while(0)
 139
 140 #define VONLIST(vp)     \
 141         ((vp)->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
 142
 143 /* remove a vnode from free vnode list */
 144 #define VREMFREE(fun, vp)       \
 145         do {    \
 146                 VLISTCHECK((fun), (vp), "free");        \
 147                 TAILQ_REMOVE(&vnode_free_list, (vp), v_freelist);       \
 148                 VLISTNONE((vp));        \
 149                 freevnodes--;   \
 150         } while(0)
 151
 152 /* remove a vnode from inactive vnode list */
 153 #define VREMINACTIVE(fun, vp)   \
 154         do {    \
 155                 VLISTCHECK((fun), (vp), "inactive"); \
 156                 VINACTIVECHECK((fun), (vp), VUINACTIVE); \
 157                 TAILQ_REMOVE(&vnode_inactive_list, (vp), v_freelist); \
 158                 CLR((vp)->v_flag, VUINACTIVE); \
 159                 VLISTNONE((vp));        \
 160                 inactivevnodes--;       \
 161         } while(0)
 162
 163 #define VORECLAIM_ENABLE(vp)   \
 164         do {    \
 165                 if (ISSET((vp)->v_flag, VORECLAIM))     \
 166                         panic("vm object raclaim already");     \
 167                 SET((vp)->v_flag, VORECLAIM);   \
 168         } while(0)
 169
 170 #define VORECLAIM_DISABLE(vp)   \
 171         do {    \
 172                 CLR((vp)->v_flag, VORECLAIM);   \
 173                 if (ISSET((vp)->v_flag, VXWANT)) {      \
 174                         CLR((vp)->v_flag, VXWANT);      \
 175                         wakeup((caddr_t)(vp));  \
 176                 }       \
 177         } while(0)
 178
 179 /*
 180  * Have to declare first two locks as actual data even if !MACH_SLOCKS, since
 181  * a pointers to them get passed around.
 182  */
 183 simple_lock_data_t mountlist_slock;
 184 simple_lock_data_t mntvnode_slock;
 185 decl_simple_lock_data(,mntid_slock);
 186 decl_simple_lock_data(,vnode_free_list_slock);
 187 decl_simple_lock_data(,spechash_slock);
 188
 189 /*
 190  * vnodetarget is the amount of vnodes we expect to get back
 191  * from the the inactive vnode list and VM object cache.
 192  * As vnreclaim() is a mainly cpu bound operation for faster
 193  * processers this number could be higher.
 194  * Having this number too high introduces longer delays in
 195  * the execution of getnewvnode().
 196  */
 197 unsigned long vnodetarget;              /* target for vnreclaim() */
 198 #define VNODE_FREE_TARGET       20      /* Default value for vnodetarget */
 199
 200 /*
 201  * We need quite a few vnodes on the free list to sustain the
 202  * rapid stat() the compilation process does, and still benefit from the name
 203  * cache. Having too few vnodes on the free list causes serious disk
 204  * thrashing as we cycle through them.
 205  */
 206 #define VNODE_FREE_MIN          300     /* freelist should have at least these many */
 207
 208 /*
 209  * We need to get vnodes back from the VM object cache when a certain #
 210  * of vnodes are reused from the freelist. This is essential for the
 211  * caching to be effective in the namecache and the buffer cache [for the
 212  * metadata].
 213  */
 214 #define VNODE_TOOMANY_REUSED    (VNODE_FREE_MIN/4)
 215
 216 /*
 217  * If we have enough vnodes on the freelist we do not want to reclaim
 218  * the vnodes from the VM object cache.
 219  */
 220 #define VNODE_FREE_ENOUGH       (VNODE_FREE_MIN + (VNODE_FREE_MIN/2))
 221
 222 /*
 223  * Initialize the vnode management data structures.
 224  */
 225 __private_extern__ void
 226 vntblinit()
 227 {
 228         extern struct lock__bsd__       exchangelock;
 229
 230         simple_lock_init(&mountlist_slock);
 231         simple_lock_init(&mntvnode_slock);
 232         simple_lock_init(&mntid_slock);
 233         simple_lock_init(&spechash_slock);
 234         TAILQ_INIT(&vnode_free_list);
 235         simple_lock_init(&vnode_free_list_slock);
 236         TAILQ_INIT(&vnode_inactive_list);
 237         CIRCLEQ_INIT(&mountlist);
 238     lockinit(&exchangelock, PVFS, "exchange", 0, 0);
 239
 240         if (!vnodetarget)
 241                 vnodetarget = VNODE_FREE_TARGET;
 242
 243         /*
 244          * Scale the vm_object_cache to accomodate the vnodes
 245          * we want to cache
 246          */
 247         (void) adjust_vm_object_cache(0, desiredvnodes - VNODE_FREE_MIN);
 248 }
 249
 250 /* Reset the VM Object Cache with the values passed in */
 251 __private_extern__ kern_return_t
 252 reset_vmobjectcache(unsigned int val1, unsigned int val2)
 253 {
 254         vm_size_t oval = val1 - VNODE_FREE_MIN;
 255         vm_size_t nval;
 256
 257         if(val2 < VNODE_FREE_MIN)
 258                 nval = 0;
 259         else
 260                 nval = val2 - VNODE_FREE_MIN;
 261
 262         return(adjust_vm_object_cache(oval, nval));
 263 }
 264
 265 /*
 266  * Mark a mount point as busy. Used to synchronize access and to delay
 267  * unmounting. Interlock is not released on failure.
 268  */
 269 int
 270 vfs_busy(mp, flags, interlkp, p)
 271         struct mount *mp;
 272         int flags;
 273         struct slock *interlkp;
 274         struct proc *p;
 275 {
 276         int lkflags;
 277
 278         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 279                 if (flags & LK_NOWAIT)
 280                         return (ENOENT);
 281                 mp->mnt_kern_flag |= MNTK_MWAIT;
 282                 if (interlkp)
 283                         simple_unlock(interlkp);
 284                 /*
 285                  * Since all busy locks are shared except the exclusive
 286                  * lock granted when unmounting, the only place that a
 287                  * wakeup needs to be done is at the release of the
 288                  * exclusive lock at the end of dounmount.
 289                  */
 290                 sleep((caddr_t)mp, PVFS);
 291                 if (interlkp)
 292                         simple_lock(interlkp);
 293                 return (ENOENT);
 294         }
 295         lkflags = LK_SHARED;
 296         if (interlkp)
 297                 lkflags |= LK_INTERLOCK;
 298         if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
 299                 panic("vfs_busy: unexpected lock failure");
 300         return (0);
 301 }
 302
 303 /*
 304  * Free a busy filesystem.
 305  */
 306 void
 307 vfs_unbusy(mp, p)
 308         struct mount *mp;
 309         struct proc *p;
 310 {
 311
 312         lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
 313 }
 314
 315 /*
 316  * Lookup a filesystem type, and if found allocate and initialize
 317  * a mount structure for it.
 318  *
 319  * Devname is usually updated by mount(8) after booting.
 320  */
 321 int
 322 vfs_rootmountalloc(fstypename, devname, mpp)
 323         char *fstypename;
 324         char *devname;
 325         struct mount **mpp;
 326 {
 327         struct proc *p = current_proc();        /* XXX */
 328         struct vfsconf *vfsp;
 329         struct mount *mp;
 330
 331         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 332                 if (!strcmp(vfsp->vfc_name, fstypename))
 333                         break;
 334         if (vfsp == NULL)
 335                 return (ENODEV);
 336         mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 337         bzero((char *)mp, (u_long)sizeof(struct mount));
 338
 339     /* Initialize the default IO constraints */
 340     mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
 341     mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
 342
 343         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
 344         (void)vfs_busy(mp, LK_NOWAIT, 0, p);
 345         LIST_INIT(&mp->mnt_vnodelist);
 346         mp->mnt_vfc = vfsp;
 347         mp->mnt_op = vfsp->vfc_vfsops;
 348         mp->mnt_flag = MNT_RDONLY;
 349         mp->mnt_vnodecovered = NULLVP;
 350         vfsp->vfc_refcount++;
 351         mp->mnt_stat.f_type = vfsp->vfc_typenum;
 352         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 353         strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
 354         mp->mnt_stat.f_mntonname[0] = '/';
 355         (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
 356         *mpp = mp;
 357         return (0);
 358 }
 359
 360 /*
 361  * Find an appropriate filesystem to use for the root. If a filesystem
 362  * has not been preselected, walk through the list of known filesystems
 363  * trying those that have mountroot routines, and try them until one
 364  * works or we have tried them all.
 365  */
 366 int
 367 vfs_mountroot()
 368 {
 369         struct vfsconf *vfsp;
 370         extern int (*mountroot)(void);
 371         int error;
 372
 373         if (mountroot != NULL) {
 374                 error = (*mountroot)();
 375                 return (error);
 376         }
 377
 378         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 379                 if (vfsp->vfc_mountroot == NULL)
 380                         continue;
 381                 if ((error = (*vfsp->vfc_mountroot)()) == 0)
 382                         return (0);
 383                 if (error != EINVAL)
 384                         printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
 385         }
 386         return (ENODEV);
 387 }
 388
 389 /*
 390  * Lookup a mount point by filesystem identifier.
 391  */
 392 struct mount *
 393 vfs_getvfs(fsid)
 394         fsid_t *fsid;
 395 {
 396         register struct mount *mp;
 397
 398         simple_lock(&mountlist_slock);
 399         for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
 400              mp = mp->mnt_list.cqe_next) {
 401                 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 402                     mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 403                         simple_unlock(&mountlist_slock);
 404                         return (mp);
 405                 }
 406         }
 407         simple_unlock(&mountlist_slock);
 408         return ((struct mount *)0);
 409 }
 410
 411 /*
 412  * Get a new unique fsid
 413  */
 414 void
 415 vfs_getnewfsid(mp)
 416         struct mount *mp;
 417 {
 418 static u_short xxxfs_mntid;
 419
 420         fsid_t tfsid;
 421         int mtype;
 422
 423         simple_lock(&mntid_slock);
 424         mtype = mp->mnt_vfc->vfc_typenum;
 425         mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
 426         mp->mnt_stat.f_fsid.val[1] = mtype;
 427         if (xxxfs_mntid == 0)
 428                 ++xxxfs_mntid;
 429         tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
 430         tfsid.val[1] = mtype;
 431         if (mountlist.cqh_first != (void *)&mountlist) {
 432                 while (vfs_getvfs(&tfsid)) {
 433                         tfsid.val[0]++;
 434                         xxxfs_mntid++;
 435                 }
 436         }
 437         mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
 438         simple_unlock(&mntid_slock);
 439 }
 440
 441 /*
 442  * Set vnode attributes to VNOVAL
 443  */
 444 void
 445 vattr_null(vap)
 446         register struct vattr *vap;
 447 {
 448
 449         vap->va_type = VNON;
 450         vap->va_size = vap->va_bytes = VNOVAL;
 451         vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
 452                 vap->va_fsid = vap->va_fileid =
 453                 vap->va_blocksize = vap->va_rdev =
 454                 vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
 455                 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
 456                 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
 457                 vap->va_flags = vap->va_gen = VNOVAL;
 458         vap->va_vaflags = 0;
 459 }
 460
 461 /*
 462  * Routines having to do with the management of the vnode table.
 463  */
 464 extern int (**dead_vnodeop_p)(void *);
 465 static void vclean __P((struct vnode *vp, int flag, struct proc *p));
 466 extern void vgonel __P((struct vnode *vp, struct proc *p));
 467 long numvnodes, freevnodes;
 468 long inactivevnodes;
 469 long vnode_reclaim_tried;
 470 long vnode_objects_reclaimed;
 471
 472
 473 extern struct vattr va_null;
 474
 475 /*
 476  * Return the next vnode from the free list.
 477  */
 478 int
 479 getnewvnode(tag, mp, vops, vpp)
 480         enum vtagtype tag;
 481         struct mount *mp;
 482         int (**vops)(void *);
 483         struct vnode **vpp;
 484 {
 485         struct proc *p = current_proc();        /* XXX */
 486         struct vnode *vp;
 487         int cnt, didretry = 0;
 488         static int reused = 0;                          /* track the reuse rate */
 489         int reclaimhits = 0;
 490
 491 retry:
 492         simple_lock(&vnode_free_list_slock);
 493         /*
 494          * MALLOC a vnode if the number of vnodes has not reached the desired
 495          * value and the number on the free list is still reasonable...
 496          * reuse from the freelist even though we may evict a name cache entry
 497          * to reduce the number of vnodes that accumulate.... vnodes tie up
 498          * wired memory and are never garbage collected
 499          */
 500         if (numvnodes < desiredvnodes && (freevnodes < (2 * VNODE_FREE_MIN))) {
 501                 numvnodes++;
 502                 simple_unlock(&vnode_free_list_slock);
 503                 MALLOC_ZONE(vp, struct vnode *, sizeof *vp, M_VNODE, M_WAITOK);
 504                 bzero((char *)vp, sizeof *vp);
 505                 VLISTNONE(vp);          /* avoid double queue removal */
 506                 simple_lock_init(&vp->v_interlock);
 507                 goto done;
 508         }
 509
 510         /*
 511          * Once the desired number of vnodes are allocated,
 512          * we start reusing the vnodes.
 513          */
 514         if (freevnodes < VNODE_FREE_MIN) {
 515                 /*
 516                  * if we are low on vnodes on the freelist attempt to get
 517                  * some back from the inactive list and VM object cache
 518                  */
 519                 simple_unlock(&vnode_free_list_slock);
 520                 (void)vnreclaim(vnodetarget);
 521                 simple_lock(&vnode_free_list_slock);
 522         }
 523         if (numvnodes >= desiredvnodes && reused > VNODE_TOOMANY_REUSED) {
 524                 reused = 0;
 525                 if (freevnodes < VNODE_FREE_ENOUGH) {
 526                         simple_unlock(&vnode_free_list_slock);
 527                         (void)vnreclaim(vnodetarget);
 528                         simple_lock(&vnode_free_list_slock);
 529                 }
 530         }
 531
 532         for (cnt = 0, vp = vnode_free_list.tqh_first;
 533                         vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
 534                 if (simple_lock_try(&vp->v_interlock)) {
 535                         /* got the interlock */
 536                         if (ISSET(vp->v_flag, VORECLAIM)) {
 537                                 /* skip over the vnodes that are being reclaimed */
 538                                 simple_unlock(&vp->v_interlock);
 539                                 reclaimhits++;
 540                         } else
 541                         break;
 542         }
 543         }
 544
 545         /*
 546          * Unless this is a bad time of the month, at most
 547          * the first NCPUS items on the free list are
 548          * locked, so this is close enough to being empty.
 549          */
 550         if (vp == NULLVP) {
 551                 simple_unlock(&vnode_free_list_slock);
 552                 if (!(didretry++) && (vnreclaim(vnodetarget) > 0))
 553                         goto retry;
 554                 tablefull("vnode");
 555                 log(LOG_EMERG, "%d vnodes locked, %d desired, %d numvnodes, "
 556                         "%d free, %d inactive, %d being reclaimed\n",
 557                         cnt, desiredvnodes, numvnodes, freevnodes, inactivevnodes,
 558                         reclaimhits);
 559                 *vpp = 0;
 560                 return (ENFILE);
 561         }
 562
 563         if (vp->v_usecount)
 564                 panic("free vnode isn't: v_type = %d, v_usecount = %d?",
 565                                 vp->v_type, vp->v_usecount);
 566
 567         VREMFREE("getnewvnode", vp);
 568         reused++;
 569         simple_unlock(&vnode_free_list_slock);
 570         vp->v_lease = NULL;
 571         cache_purge(vp);
 572         if (vp->v_type != VBAD)
 573                 vgonel(vp, p);  /* clean and reclaim the vnode */
 574         else
 575                 simple_unlock(&vp->v_interlock);
 576 #if DIAGNOSTIC
 577         if (vp->v_data)
 578                 panic("cleaned vnode isn't");
 579         {
 580         int s = splbio();
 581         if (vp->v_numoutput)
 582                 panic("Clean vnode has pending I/O's");
 583         splx(s);
 584         }
 585 #endif
 586         if (UBCINFOEXISTS(vp))
 587                 panic("getnewvnode: ubcinfo not cleaned");
 588         else
 589                 vp->v_ubcinfo = 0;
 590
 591         vp->v_lastr = -1;
 592         vp->v_ralen = 0;
 593         vp->v_maxra = 0;
 594         vp->v_lastw = 0;
 595         vp->v_ciosiz = 0;
 596         vp->v_cstart = 0;
 597         vp->v_clen = 0;
 598         vp->v_socket = 0;
 599
 600 done:
 601         vp->v_flag = VSTANDARD;
 602         vp->v_type = VNON;
 603         vp->v_tag = tag;
 604         vp->v_op = vops;
 605         insmntque(vp, mp);
 606         *vpp = vp;
 607         vp->v_usecount = 1;
 608         vp->v_data = 0;
 609         return (0);
 610 }
 611
 612 /*
 613  * Move a vnode from one mount queue to another.
 614  */
 615 void
 616 insmntque(vp, mp)
 617         struct vnode *vp;
 618         struct mount *mp;
 619 {
 620
 621         simple_lock(&mntvnode_slock);
 622         /*
 623          * Delete from old mount point vnode list, if on one.
 624          */
 625         if (vp->v_mount != NULL)
 626                 LIST_REMOVE(vp, v_mntvnodes);
 627         /*
 628          * Insert into list of vnodes for the new mount point, if available.
 629          */
 630         if ((vp->v_mount = mp) != NULL)
 631                 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
 632         simple_unlock(&mntvnode_slock);
 633 }
 634
 635 __inline void
 636 vpwakeup(struct vnode *vp)
 637 {
 638         if (vp) {
 639                 if (--vp->v_numoutput < 0)
 640                         panic("vpwakeup: neg numoutput");
 641                 if ((vp->v_flag & VBWAIT || vp->v_flag & VTHROTTLED)
 642                     && vp->v_numoutput <= 0) {
 643                         vp->v_flag &= ~(VBWAIT|VTHROTTLED);
 644                         wakeup((caddr_t)&vp->v_numoutput);
 645                 }
 646         }
 647 }
 648
 649 /*
 650  * Update outstanding I/O count and do wakeup if requested.
 651  */
 652 void
 653 vwakeup(bp)
 654         register struct buf *bp;
 655 {
 656         CLR(bp->b_flags, B_WRITEINPROG);
 657         vpwakeup(bp->b_vp);
 658 }
 659
 660 /*
 661  * Flush out and invalidate all buffers associated with a vnode.
 662  * Called with the underlying object locked.
 663  */
 664 int
 665 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 666         register struct vnode *vp;
 667         int flags;
 668         struct ucred *cred;
 669         struct proc *p;
 670         int slpflag, slptimeo;
 671 {
 672         register struct buf *bp;
 673         struct buf *nbp, *blist;
 674         int s, error = 0;
 675
 676         if (flags & V_SAVE) {
 677                 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) {
 678                         return (error);
 679                 }
 680
 681                 // XXXdbg - if there are dirty bufs, wait for 'em if they're busy
 682                 for (bp=vp->v_dirtyblkhd.lh_first; bp; bp=nbp) {
 683                     nbp = bp->b_vnbufs.le_next;
 684                     if (ISSET(bp->b_flags, B_BUSY)) {
 685                         SET(bp->b_flags, B_WANTED);
 686                         tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), "vinvalbuf", 0);
 687                         nbp = vp->v_dirtyblkhd.lh_first;
 688                     } else {
 689                         panic("vinvalbuf: dirty buf (vp 0x%x, bp 0x%x)", vp, bp);
 690                     }
 691                 }
 692         }
 693
 694         for (;;) {
 695                 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
 696                         while (blist && blist->b_lblkno < 0)
 697                                 blist = blist->b_vnbufs.le_next;
 698                 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
 699                     (flags & V_SAVEMETA))
 700                         while (blist && blist->b_lblkno < 0)
 701                                 blist = blist->b_vnbufs.le_next;
 702                 if (!blist)
 703                         break;
 704
 705                 for (bp = blist; bp; bp = nbp) {
 706                         nbp = bp->b_vnbufs.le_next;
 707                         if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
 708                                 continue;
 709                         s = splbio();
 710                         if (ISSET(bp->b_flags, B_BUSY)) {
 711                                 SET(bp->b_flags, B_WANTED);
 712                                 error = tsleep((caddr_t)bp,
 713                                         slpflag | (PRIBIO + 1), "vinvalbuf",
 714                                         slptimeo);
 715                                 splx(s);
 716                                 if (error) {
 717                                         return (error);
 718                                 }
 719                                 break;
 720                         }
 721                         bremfree(bp);
 722                         SET(bp->b_flags, B_BUSY);
 723                         splx(s);
 724                         /*
 725                          * XXX Since there are no node locks for NFS, I believe
 726                          * there is a slight chance that a delayed write will
 727                          * occur while sleeping just above, so check for it.
 728                          */
 729                         if (ISSET(bp->b_flags, B_DELWRI) && (flags & V_SAVE)) {
 730                                 (void) VOP_BWRITE(bp);
 731                                 break;
 732                         }
 733
 734                         if (bp->b_flags & B_LOCKED) {
 735                                 panic("vinvalbuf: bp @ 0x%x is locked!\n", bp);
 736                                 break;
 737                         } else {
 738                                 SET(bp->b_flags, B_INVAL);
 739                         }
 740                         brelse(bp);
 741                 }
 742         }
 743         if (!(flags & V_SAVEMETA) &&
 744             (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
 745                 panic("vinvalbuf: flush failed");
 746         return (0);
 747 }
 748
 749 /*
 750  * Create a vnode for a block device.
 751  * Used for root filesystem, argdev, and swap areas.
 752  * Also used for memory file system special devices.
 753  */
 754 int
 755 bdevvp(dev, vpp)
 756         dev_t dev;
 757         struct vnode **vpp;
 758 {
 759         register struct vnode *vp;
 760         struct vnode *nvp;
 761         int error;
 762
 763         if (dev == NODEV) {
 764                 *vpp = NULLVP;
 765                 return (ENODEV);
 766         }
 767         error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
 768         if (error) {
 769                 *vpp = NULLVP;
 770                 return (error);
 771         }
 772         vp = nvp;
 773         vp->v_type = VBLK;
 774         if (nvp = checkalias(vp, dev, (struct mount *)0)) {
 775                 vput(vp);
 776                 vp = nvp;
 777         }
 778         *vpp = vp;
 779         return (0);
 780 }
 781
 782 /*
 783  * Check to see if the new vnode represents a special device
 784  * for which we already have a vnode (either because of
 785  * bdevvp() or because of a different vnode representing
 786  * the same block device). If such an alias exists, deallocate
 787  * the existing contents and return the aliased vnode. The
 788  * caller is responsible for filling it with its new contents.
 789  */
 790 struct vnode *
 791 checkalias(nvp, nvp_rdev, mp)
 792         register struct vnode *nvp;
 793         dev_t nvp_rdev;
 794         struct mount *mp;
 795 {
 796         struct proc *p = current_proc();        /* XXX */
 797         struct vnode *vp;
 798         struct vnode **vpp;
 799         struct specinfo * bufhold;
 800         int buffree = 1;
 801
 802         if (nvp->v_type != VBLK && nvp->v_type != VCHR)
 803                 return (NULLVP);
 804
 805         bufhold = (struct specinfo *)_MALLOC_ZONE(sizeof(struct specinfo),
 806                         M_VNODE, M_WAITOK);
 807         vpp = &speclisth[SPECHASH(nvp_rdev)];
 808 loop:
 809         simple_lock(&spechash_slock);
 810         for (vp = *vpp; vp; vp = vp->v_specnext) {
 811                 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
 812                         continue;
 813                 /*
 814                  * Alias, but not in use, so flush it out.
 815                  */
 816                 simple_lock(&vp->v_interlock);
 817                 if (vp->v_usecount == 0) {
 818                         simple_unlock(&spechash_slock);
 819                         vgonel(vp, p);
 820                         goto loop;
 821                 }
 822                 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
 823                         simple_unlock(&spechash_slock);
 824                         goto loop;
 825                 }
 826                 break;
 827         }
 828         if (vp == NULL || vp->v_tag != VT_NON) {
 829                 nvp->v_specinfo = bufhold;
 830                 buffree = 0;    /* buffer used */
 831                 bzero(nvp->v_specinfo, sizeof(struct specinfo));
 832                 nvp->v_rdev = nvp_rdev;
 833                 nvp->v_hashchain = vpp;
 834                 nvp->v_specnext = *vpp;
 835                 nvp->v_specflags = 0;
 836                 simple_unlock(&spechash_slock);
 837                 *vpp = nvp;
 838                 if (vp != NULLVP) {
 839                         nvp->v_flag |= VALIASED;
 840                         vp->v_flag |= VALIASED;
 841                         vput(vp);
 842                 }
 843                 /* Since buffer is used just return */
 844                 return (NULLVP);
 845         }
 846         simple_unlock(&spechash_slock);
 847         VOP_UNLOCK(vp, 0, p);
 848         simple_lock(&vp->v_interlock);
 849         vclean(vp, 0, p);
 850         vp->v_op = nvp->v_op;
 851         vp->v_tag = nvp->v_tag;
 852         nvp->v_type = VNON;
 853         insmntque(vp, mp);
 854         if (buffree)
 855                 _FREE_ZONE((void *)bufhold, sizeof (struct specinfo), M_VNODE);
 856         return (vp);
 857 }
 858
 859 /*
 860  * Get a reference on a particular vnode and lock it if requested.
 861  * If the vnode was on the inactive list, remove it from the list.
 862  * If the vnode was on the free list, remove it from the list and
 863  * move it to inactive list as needed.
 864  * The vnode lock bit is set if the vnode is being eliminated in
 865  * vgone. The process is awakened when the transition is completed,
 866  * and an error returned to indicate that the vnode is no longer
 867  * usable (possibly having been changed to a new file system type).
 868  */
 869 int
 870 vget(vp, flags, p)
 871         struct vnode *vp;
 872         int flags;
 873         struct proc *p;
 874 {
 875         int error = 0;
 876
 877 retry:
 878
 879         /*
 880          * If the vnode is in the process of being cleaned out for
 881          * another use, we wait for the cleaning to finish and then
 882          * return failure. Cleaning is determined by checking that
 883          * the VXLOCK flag is set.
 884          */
 885         if ((flags & LK_INTERLOCK) == 0)
 886                 simple_lock(&vp->v_interlock);
 887         if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
 888                 vp->v_flag |= VXWANT;
 889                 simple_unlock(&vp->v_interlock);
 890                 (void)tsleep((caddr_t)vp, PINOD, "vget", 0);
 891                 return (ENOENT);
 892         }
 893
 894         /*
 895          * vnode is being terminated.
 896          * wait for vnode_pager_no_senders() to clear VTERMINATE
 897          */
 898         if (ISSET(vp->v_flag, VTERMINATE)) {
 899                 SET(vp->v_flag, VTERMWANT);
 900                 simple_unlock(&vp->v_interlock);
 901                 (void)tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vclean", 0);
 902                 return (ENOENT);
 903         }
 904
 905         /*
 906          * if the vnode is being initialized,
 907          * wait for it to finish initialization
 908          */
 909         if (ISSET(vp->v_flag,  VUINIT)) {
 910                 if (ISSET(vp->v_flag,  VUINIT)) {
 911                         SET(vp->v_flag, VUWANT);
 912                         simple_unlock(&vp->v_interlock);
 913                         (void) tsleep((caddr_t)vp, PINOD, "vget2", 0);
 914                         goto retry;
 915                 }
 916         }
 917
 918         simple_lock(&vnode_free_list_slock);
 919         if (vp->v_usecount == 0) {
 920                 /* If on the free list, remove it from there */
 921                 if (VONLIST(vp))
 922                         VREMFREE("vget", vp);
 923         } else {
 924                 /* If on the inactive list, remove it from there */
 925                 if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
 926                         if (VONLIST(vp))
 927                                 VREMINACTIVE("vget", vp);
 928                 }
 929         }
 930
 931         /* The vnode should not be on the inactive list here */
 932         VINACTIVECHECK("vget", vp, 0);
 933
 934         simple_unlock(&vnode_free_list_slock);
 935
 936         if (++vp->v_usecount <= 0)
 937                 panic("vget: v_usecount");
 938
 939         /*
 940          * Recover named reference as needed
 941          */
 942         if (UBCISVALID(vp) && !ubc_issetflags(vp, UI_HASOBJREF)) {
 943                 simple_unlock(&vp->v_interlock);
 944                 if (ubc_getobject(vp, UBC_HOLDOBJECT)) {
 945                         error = ENOENT;
 946                         goto errout;
 947                 }
 948                 simple_lock(&vp->v_interlock);
 949         }
 950
 951         if (flags & LK_TYPE_MASK) {
 952                 if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
 953                         goto errout;
 954                 return (0);
 955         }
 956
 957         if ((flags & LK_INTERLOCK) == 0)
 958                 simple_unlock(&vp->v_interlock);
 959         return (0);
 960
 961 errout:
 962         /*
 963          * If the vnode was not active in the first place
 964          * must not call vrele() as VOP_INACTIVE() is not
 965          * required.
 966          * So inlined part of vrele() here.
 967          */
 968         simple_lock(&vp->v_interlock);
 969         if (--vp->v_usecount == 1) {
 970                 if (UBCINFOEXISTS(vp)) {
 971                         vinactive(vp);
 972                         simple_unlock(&vp->v_interlock);
 973                         return (error);
 974                 }
 975         }
 976         if (vp->v_usecount > 0) {
 977                 simple_unlock(&vp->v_interlock);
 978                 return (error);
 979         }
 980         if (vp->v_usecount < 0)
 981                 panic("vget: negative usecount (%d)", vp->v_usecount);
 982         vfree(vp);
 983         simple_unlock(&vp->v_interlock);
 984         return (error);
 985 }
 986
 987 /*
 988  * Get a pager reference on the particular vnode.
 989  *
 990  * This is called from ubc_info_init() and it is asumed that
 991  * the vnode is neither on the free list on on the inactive list.
 992  * It is also assumed that the vnode is neither being recycled
 993  * by vgonel nor being terminated by vnode_pager_vrele().
 994  *
 995  * The vnode interlock is NOT held by the caller.
 996  */
 997 __private_extern__ int
 998 vnode_pager_vget(vp)
 999         struct vnode *vp;
1000 {
1001         simple_lock(&vp->v_interlock);
1002         if (UBCINFOMISSING(vp))
1003                 panic("vnode_pager_vget: stolen ubc_info");
1004
1005         if (!UBCINFOEXISTS(vp))
1006                 panic("vnode_pager_vget: lost ubc_info");
1007
1008         if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM))
1009                 panic("vnode_pager_vget: already being reclaimd");
1010
1011         if (ISSET(vp->v_flag, VTERMINATE))
1012                 panic("vnode_pager_vget: already being terminated");
1013
1014         simple_lock(&vnode_free_list_slock);
1015         /* The vnode should not be on ANY list */
1016         if (VONLIST(vp))
1017                 panic("vnode_pager_vget: still on the list");
1018
1019         /* The vnode should not be on the inactive list here */
1020         VINACTIVECHECK("vnode_pager_vget", vp, 0);
1021         simple_unlock(&vnode_free_list_slock);
1022
1023         /* After all those checks, now do the real work :-) */
1024         if (++vp->v_usecount <= 0)
1025                 panic("vnode_pager_vget: v_usecount");
1026         simple_unlock(&vp->v_interlock);
1027
1028         return (0);
1029 }
1030
1031 /*
1032  * Stubs to use when there is no locking to be done on the underlying object.
1033  * A minimal shared lock is necessary to ensure that the underlying object
1034  * is not revoked while an operation is in progress. So, an active shared
1035  * count is maintained in an auxillary vnode lock structure.
1036  */
1037 int
1038 vop_nolock(ap)
1039         struct vop_lock_args /* {
1040                 struct vnode *a_vp;
1041                 int a_flags;
1042                 struct proc *a_p;
1043         } */ *ap;
1044 {
1045 #ifdef notyet
1046         /*
1047          * This code cannot be used until all the non-locking filesystems
1048          * (notably NFS) are converted to properly lock and release nodes.
1049          * Also, certain vnode operations change the locking state within
1050          * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
1051          * and symlink). Ideally these operations should not change the
1052          * lock state, but should be changed to let the caller of the
1053          * function unlock them. Otherwise all intermediate vnode layers
1054          * (such as union, umapfs, etc) must catch these functions to do
1055          * the necessary locking at their layer. Note that the inactive
1056          * and lookup operations also change their lock state, but this
1057          * cannot be avoided, so these two operations will always need
1058          * to be handled in intermediate layers.
1059          */
1060         struct vnode *vp = ap->a_vp;
1061         int vnflags, flags = ap->a_flags;
1062
1063         if (vp->v_vnlock == NULL) {
1064                 if ((flags & LK_TYPE_MASK) == LK_DRAIN)
1065                         return (0);
1066                 MALLOC_ZONE(vp->v_vnlock, struct lock__bsd__ *,
1067                                 sizeof(struct lock__bsd__), M_VNODE, M_WAITOK);
1068                 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1069         }
1070         switch (flags & LK_TYPE_MASK) {
1071         case LK_DRAIN:
1072                 vnflags = LK_DRAIN;
1073                 break;
1074         case LK_EXCLUSIVE:
1075         case LK_SHARED:
1076                 vnflags = LK_SHARED;
1077                 break;
1078         case LK_UPGRADE:
1079         case LK_EXCLUPGRADE:
1080         case LK_DOWNGRADE:
1081                 return (0);
1082         case LK_RELEASE:
1083         default:
1084                 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
1085         }
1086         if (flags & LK_INTERLOCK)
1087                 vnflags |= LK_INTERLOCK;
1088         return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
1089 #else /* for now */
1090         /*
1091          * Since we are not using the lock manager, we must clear
1092          * the interlock here.
1093          */
1094         if (ap->a_flags & LK_INTERLOCK)
1095                 simple_unlock(&ap->a_vp->v_interlock);
1096         return (0);
1097 #endif
1098 }
1099
1100 /*
1101  * Decrement the active use count.
1102  */
1103 int
1104 vop_nounlock(ap)
1105         struct vop_unlock_args /* {
1106                 struct vnode *a_vp;
1107                 int a_flags;
1108                 struct proc *a_p;
1109         } */ *ap;
1110 {
1111         struct vnode *vp = ap->a_vp;
1112
1113         if (vp->v_vnlock == NULL)
1114                 return (0);
1115         return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p));
1116 }
1117
1118 /*
1119  * Return whether or not the node is in use.
1120  */
1121 int
1122 vop_noislocked(ap)
1123         struct vop_islocked_args /* {
1124                 struct vnode *a_vp;
1125         } */ *ap;
1126 {
1127         struct vnode *vp = ap->a_vp;
1128
1129         if (vp->v_vnlock == NULL)
1130                 return (0);
1131         return (lockstatus(vp->v_vnlock));
1132 }
1133
1134 /*
1135  * Vnode reference.
1136  */
1137 void
1138 vref(vp)
1139         struct vnode *vp;
1140 {
1141
1142         simple_lock(&vp->v_interlock);
1143         if (vp->v_usecount <= 0)
1144                 panic("vref used where vget required");
1145
1146         /* If on the inactive list, remove it from there */
1147         if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
1148                 if (VONLIST(vp)) {
1149                         simple_lock(&vnode_free_list_slock);
1150                         VREMINACTIVE("vref", vp);
1151                         simple_unlock(&vnode_free_list_slock);
1152                 }
1153         }
1154         /* The vnode should not be on the inactive list here */
1155         VINACTIVECHECK("vref", vp, 0);
1156
1157         if (++vp->v_usecount <= 0)
1158                 panic("vref v_usecount");
1159         simple_unlock(&vp->v_interlock);
1160 }
1161
1162 /*
1163  * put the vnode on appropriate free list.
1164  * called with v_interlock held.
1165  */
1166 static void
1167 vfree(vp)
1168         struct vnode *vp;
1169 {
1170         /*
1171          * if the vnode is not obtained by calling getnewvnode() we
1172          * are not responsible for the cleanup. Just return.
1173          */
1174         if (!(vp->v_flag & VSTANDARD)) {
1175                 return;
1176         }
1177
1178         if (vp->v_usecount != 0)
1179                 panic("vfree: v_usecount");
1180
1181         /* insert at tail of LRU list or at head if VAGE is set */
1182         simple_lock(&vnode_free_list_slock);
1183
1184         if (VONLIST(vp))
1185                  panic("vfree: vnode still on list");
1186
1187         if (vp->v_flag & VAGE) {
1188                 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1189                 vp->v_flag &= ~VAGE;
1190         } else
1191                 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1192         freevnodes++;
1193         simple_unlock(&vnode_free_list_slock);
1194         return;
1195 }
1196
1197 /*
1198  * put the vnode on the inactive list.
1199  * called with v_interlock held
1200  */
1201 static void
1202 vinactive(vp)
1203         struct vnode *vp;
1204 {
1205         if (!UBCINFOEXISTS(vp))
1206                 panic("vinactive: not a UBC vnode");
1207
1208         if (vp->v_usecount != 1)
1209                 panic("vinactive: v_usecount");
1210
1211         simple_lock(&vnode_free_list_slock);
1212
1213         if (VONLIST(vp))
1214                  panic("vinactive: vnode still on list");
1215         VINACTIVECHECK("vinactive", vp, 0);
1216
1217         TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_freelist);
1218         SET(vp->v_flag, VUINACTIVE);
1219         CLR(vp->v_flag, (VNOCACHE_DATA | VRAOFF));
1220
1221         inactivevnodes++;
1222         simple_unlock(&vnode_free_list_slock);
1223         return;
1224 }
1225
1226
1227 /*
1228  * vput(), just unlock and vrele()
1229  */
1230 void
1231 vput(vp)
1232         struct vnode *vp;
1233 {
1234         struct proc *p = current_proc();        /* XXX */
1235
1236         simple_lock(&vp->v_interlock);
1237         if (--vp->v_usecount == 1) {
1238                 if (UBCINFOEXISTS(vp)) {
1239                         vinactive(vp);
1240                         simple_unlock(&vp->v_interlock);
1241                         VOP_UNLOCK(vp, 0, p);
1242                         return;
1243                 }
1244         }
1245         if (vp->v_usecount > 0) {
1246                 simple_unlock(&vp->v_interlock);
1247                 VOP_UNLOCK(vp, 0, p);
1248                 return;
1249         }
1250 #if DIAGNOSTIC
1251         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1252                 vprint("vput: bad ref count", vp);
1253                 panic("vput: v_usecount = %d, v_writecount = %d",
1254                         vp->v_usecount, vp->v_writecount);
1255         }
1256 #endif
1257         if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
1258                 VREMINACTIVE("vrele", vp);
1259
1260         simple_unlock(&vp->v_interlock);
1261         VOP_INACTIVE(vp, p);
1262         /*
1263          * The interlock is not held and
1264          * VOP_INCATIVE releases the vnode lock.
1265          * We could block and the vnode might get reactivated
1266          * Can not just call vfree without checking the state
1267          */
1268         simple_lock(&vp->v_interlock);
1269         if (!VONLIST(vp)) {
1270                 if (vp->v_usecount == 0)
1271                         vfree(vp);
1272                 else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp))
1273                         vinactive(vp);
1274         }
1275         simple_unlock(&vp->v_interlock);
1276 }
1277
1278 /*
1279  * Vnode release.
1280  * If count drops to zero, call inactive routine and return to freelist.
1281  */
1282 void
1283 vrele(vp)
1284         struct vnode *vp;
1285 {
1286         struct proc *p = current_proc();        /* XXX */
1287
1288         simple_lock(&vp->v_interlock);
1289         if (--vp->v_usecount == 1) {
1290                 if (UBCINFOEXISTS(vp)) {
1291                         vinactive(vp);
1292                         simple_unlock(&vp->v_interlock);
1293                         return;
1294                 }
1295         }
1296         if (vp->v_usecount > 0) {
1297                 simple_unlock(&vp->v_interlock);
1298                 return;
1299         }
1300 #if DIAGNOSTIC
1301         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1302                 vprint("vrele: bad ref count", vp);
1303                 panic("vrele: ref cnt");
1304         }
1305 #endif
1306         if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
1307                 VREMINACTIVE("vrele", vp);
1308
1309
1310         if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
1311                 /* vnode is being cleaned, just return */
1312                 vfree(vp);
1313                 simple_unlock(&vp->v_interlock);
1314                 return;
1315         }
1316
1317         if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1318                 VOP_INACTIVE(vp, p);
1319                 /*
1320                  * vn_lock releases the interlock and
1321                  * VOP_INCATIVE releases the vnode lock.
1322                  * We could block and the vnode might get reactivated
1323                  * Can not just call vfree without checking the state
1324                  */
1325                 simple_lock(&vp->v_interlock);
1326                 if (!VONLIST(vp)) {
1327                         if (vp->v_usecount == 0)
1328                                 vfree(vp);
1329                         else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp))
1330                                 vinactive(vp);
1331                 }
1332                 simple_unlock(&vp->v_interlock);
1333         }
1334 #if 0
1335         else {
1336                 vfree(vp);
1337                 simple_unlock(&vp->v_interlock);
1338                 kprintf("vrele: vn_lock() failed for vp = 0x%08x\n", vp);
1339         }
1340 #endif
1341 }
1342
1343 void
1344 vagevp(vp)
1345         struct vnode *vp;
1346 {
1347         simple_lock(&vp->v_interlock);
1348         vp->v_flag |= VAGE;
1349         simple_unlock(&vp->v_interlock);
1350         return;
1351 }
1352
1353 /*
1354  * Page or buffer structure gets a reference.
1355  */
1356 void
1357 vhold(vp)
1358         register struct vnode *vp;
1359 {
1360
1361         simple_lock(&vp->v_interlock);
1362         vp->v_holdcnt++;
1363         simple_unlock(&vp->v_interlock);
1364 }
1365
1366 /*
1367  * Page or buffer structure frees a reference.
1368  */
1369 void
1370 holdrele(vp)
1371         register struct vnode *vp;
1372 {
1373
1374         simple_lock(&vp->v_interlock);
1375         if (vp->v_holdcnt <= 0)
1376                 panic("holdrele: holdcnt");
1377         vp->v_holdcnt--;
1378         simple_unlock(&vp->v_interlock);
1379 }
1380
1381 /*
1382  * Remove any vnodes in the vnode table belonging to mount point mp.
1383  *
1384  * If MNT_NOFORCE is specified, there should not be any active ones,
1385  * return error if any are found (nb: this is a user error, not a
1386  * system error). If MNT_FORCE is specified, detach any active vnodes
1387  * that are found.
1388  */
1389 #if DIAGNOSTIC
1390 int busyprt = 0;        /* print out busy vnodes */
1391 #if 0
1392 struct ctldebug debug1 = { "busyprt", &busyprt };
1393 #endif /* 0 */
1394 #endif
1395
1396 int
1397 vflush(mp, skipvp, flags)
1398         struct mount *mp;
1399         struct vnode *skipvp;
1400         int flags;
1401 {
1402         struct proc *p = current_proc();
1403         struct vnode *vp, *nvp;
1404         int busy = 0;
1405
1406         simple_lock(&mntvnode_slock);
1407 loop:
1408         for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1409                 if (vp->v_mount != mp)
1410                         goto loop;
1411                 nvp = vp->v_mntvnodes.le_next;
1412                 /*
1413                  * Skip over a selected vnode.
1414                  */
1415                 if (vp == skipvp)
1416                         continue;
1417
1418                 simple_lock(&vp->v_interlock);
1419                 /*
1420                  * Skip over a vnodes marked VSYSTEM or VNOFLUSH.
1421                  */
1422                 if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || (vp->v_flag & VNOFLUSH))) {
1423                         simple_unlock(&vp->v_interlock);
1424                         continue;
1425                 }
1426                 /*
1427                  * Skip over a vnodes marked VSWAP.
1428                  */
1429                 if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) {
1430                         simple_unlock(&vp->v_interlock);
1431                         continue;
1432                 }
1433                 /*
1434                  * If WRITECLOSE is set, only flush out regular file
1435                  * vnodes open for writing.
1436                  */
1437                 if ((flags & WRITECLOSE) &&
1438                     (vp->v_writecount == 0 || vp->v_type != VREG)) {
1439                         simple_unlock(&vp->v_interlock);
1440                         continue;
1441                 }
1442                 /*
1443                  * With v_usecount == 0, all we need to do is clear
1444                  * out the vnode data structures and we are done.
1445                  */
1446                 if (vp->v_usecount == 0) {
1447                         simple_unlock(&mntvnode_slock);
1448                         vgonel(vp, p);
1449                         simple_lock(&mntvnode_slock);
1450                         continue;
1451                 }
1452                 /*
1453                  * If FORCECLOSE is set, forcibly close the vnode.
1454                  * For block or character devices, revert to an
1455                  * anonymous device. For all other files, just kill them.
1456                  */
1457                 if (flags & FORCECLOSE) {
1458                         simple_unlock(&mntvnode_slock);
1459                         if (vp->v_type != VBLK && vp->v_type != VCHR) {
1460                                 vgonel(vp, p);
1461                         } else {
1462                                 vclean(vp, 0, p);
1463                                 vp->v_op = spec_vnodeop_p;
1464                                 insmntque(vp, (struct mount *)0);
1465                         }
1466                         simple_lock(&mntvnode_slock);
1467                         continue;
1468                 }
1469 #if DIAGNOSTIC
1470                 if (busyprt)
1471                         vprint("vflush: busy vnode", vp);
1472 #endif
1473                 simple_unlock(&vp->v_interlock);
1474                 busy++;
1475         }
1476         simple_unlock(&mntvnode_slock);
1477         if (busy && ((flags & FORCECLOSE)==0))
1478                 return (EBUSY);
1479         return (0);
1480 }
1481
1482 /*
1483  * Disassociate the underlying file system from a vnode.
1484  * The vnode interlock is held on entry.
1485  */
1486 static void
1487 vclean(vp, flags, p)
1488         struct vnode *vp;
1489         int flags;
1490         struct proc *p;
1491 {
1492         int active;
1493         int removed = 0;
1494         int didhold;
1495
1496         /*
1497          * if the vnode is not obtained by calling getnewvnode() we
1498          * are not responsible for the cleanup. Just return.
1499          */
1500         if (!(vp->v_flag & VSTANDARD)) {
1501                 simple_unlock(&vp->v_interlock);
1502                 return;
1503         }
1504
1505         /*
1506          * Check to see if the vnode is in use.
1507          * If so we have to reference it before we clean it out
1508          * so that its count cannot fall to zero and generate a
1509          * race against ourselves to recycle it.
1510          */
1511         if (active = vp->v_usecount)
1512                 if (++vp->v_usecount <= 0)
1513                         panic("vclean: v_usecount");
1514         /*
1515          * Prevent the vnode from being recycled or
1516          * brought into use while we clean it out.
1517          */
1518         if (vp->v_flag & VXLOCK)
1519                 panic("vclean: deadlock");
1520         vp->v_flag |= VXLOCK;
1521
1522         /*
1523          * Even if the count is zero, the VOP_INACTIVE routine may still
1524          * have the object locked while it cleans it out. The VOP_LOCK
1525          * ensures that the VOP_INACTIVE routine is done with its work.
1526          * For active vnodes, it ensures that no other activity can
1527          * occur while the underlying object is being cleaned out.
1528          */
1529         VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1530
1531         /*
1532          * if this vnode is on the inactive list
1533          * take it off the list.
1534          */
1535         if ((active == 1) &&
1536                 (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))) {
1537                 simple_lock(&vnode_free_list_slock);
1538                 VREMINACTIVE("vclean", vp);
1539                 simple_unlock(&vnode_free_list_slock);
1540                 removed++;
1541         }
1542
1543         /* Clean the pages in VM. */
1544         if (active && (flags & DOCLOSE))
1545                 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1546
1547         /* Clean the pages in VM. */
1548         didhold = ubc_hold(vp);
1549         if ((active) && (didhold))
1550                 (void)ubc_clean(vp, 0); /* do not invalidate */
1551
1552         /*
1553          * Clean out any buffers associated with the vnode.
1554          */
1555         if (flags & DOCLOSE) {
1556                 if (vp->v_tag == VT_NFS)
1557             nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
1558         else
1559             vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1560     }
1561
1562         if (active)
1563                 VOP_INACTIVE(vp, p);
1564         else
1565                 VOP_UNLOCK(vp, 0, p);
1566
1567         /* Destroy ubc named reference */
1568     if (didhold) {
1569         ubc_rele(vp);
1570                 ubc_destroy_named(vp);
1571         }
1572
1573         /*
1574          * Reclaim the vnode.
1575          */
1576         if (VOP_RECLAIM(vp, p))
1577                 panic("vclean: cannot reclaim");
1578         cache_purge(vp);
1579         if (vp->v_vnlock) {
1580                 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1581                         vprint("vclean: lock not drained", vp);
1582                 FREE_ZONE(vp->v_vnlock, sizeof (struct lock__bsd__), M_VNODE);
1583                 vp->v_vnlock = NULL;
1584         }
1585
1586         /* It's dead, Jim! */
1587         vp->v_op = dead_vnodeop_p;
1588         vp->v_tag = VT_NON;
1589
1590         /*
1591          * Done with purge, notify sleepers of the grim news.
1592          */
1593         vp->v_flag &= ~VXLOCK;
1594         if (vp->v_flag & VXWANT) {
1595                 vp->v_flag &= ~VXWANT;
1596                 wakeup((caddr_t)vp);
1597         }
1598
1599         if (active)
1600                 vrele(vp);
1601 }
1602
1603 /*
1604  * Eliminate all activity associated with  the requested vnode
1605  * and with all vnodes aliased to the requested vnode.
1606  */
1607 int
1608 vop_revoke(ap)
1609         struct vop_revoke_args /* {
1610                 struct vnode *a_vp;
1611                 int a_flags;
1612         } */ *ap;
1613 {
1614         struct vnode *vp, *vq;
1615         struct proc *p = current_proc();
1616
1617 #if DIAGNOSTIC
1618         if ((ap->a_flags & REVOKEALL) == 0)
1619                 panic("vop_revoke");
1620 #endif
1621
1622         vp = ap->a_vp;
1623         simple_lock(&vp->v_interlock);
1624
1625         if (vp->v_flag & VALIASED) {
1626                 /*
1627                  * If a vgone (or vclean) is already in progress,
1628                  * wait until it is done and return.
1629                  */
1630                 if (vp->v_flag & VXLOCK) {
1631                         while (vp->v_flag & VXLOCK) {
1632                                 vp->v_flag |= VXWANT;
1633                                 simple_unlock(&vp->v_interlock);
1634                                 (void)tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1635                         }
1636                         return (0);
1637                 }
1638                 /*
1639                  * Ensure that vp will not be vgone'd while we
1640                  * are eliminating its aliases.
1641                  */
1642                 vp->v_flag |= VXLOCK;
1643                 simple_unlock(&vp->v_interlock);
1644                 while (vp->v_flag & VALIASED) {
1645                         simple_lock(&spechash_slock);
1646                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1647                                 if (vq->v_rdev != vp->v_rdev ||
1648                                     vq->v_type != vp->v_type || vp == vq)
1649                                         continue;
1650                                 simple_unlock(&spechash_slock);
1651                                 vgone(vq);
1652                                 break;
1653                         }
1654                         if (vq == NULLVP)
1655                                 simple_unlock(&spechash_slock);
1656                 }
1657                 /*
1658                  * Remove the lock so that vgone below will
1659                  * really eliminate the vnode after which time
1660                  * vgone will awaken any sleepers.
1661                  */
1662                 simple_lock(&vp->v_interlock);
1663                 vp->v_flag &= ~VXLOCK;
1664         }
1665         vgonel(vp, p);
1666         return (0);
1667 }
1668
1669 /*
1670  * Recycle an unused vnode to the front of the free list.
1671  * Release the passed interlock if the vnode will be recycled.
1672  */
1673 int
1674 vrecycle(vp, inter_lkp, p)
1675         struct vnode *vp;
1676         struct slock *inter_lkp;
1677         struct proc *p;
1678 {
1679
1680         simple_lock(&vp->v_interlock);
1681         if (vp->v_usecount == 0) {
1682                 if (inter_lkp)
1683                         simple_unlock(inter_lkp);
1684                 vgonel(vp, p);
1685                 return (1);
1686         }
1687         simple_unlock(&vp->v_interlock);
1688         return (0);
1689 }
1690
1691 /*
1692  * Eliminate all activity associated with a vnode
1693  * in preparation for reuse.
1694  */
1695 void
1696 vgone(vp)
1697         struct vnode *vp;
1698 {
1699         struct proc *p = current_proc();
1700
1701         simple_lock(&vp->v_interlock);
1702         vgonel(vp, p);
1703 }
1704
1705 /*
1706  * vgone, with the vp interlock held.
1707  */
1708 void
1709 vgonel(vp, p)
1710         struct vnode *vp;
1711         struct proc *p;
1712 {
1713         struct vnode *vq;
1714         struct vnode *vx;
1715
1716         /*
1717          * if the vnode is not obtained by calling getnewvnode() we
1718          * are not responsible for the cleanup. Just return.
1719          */
1720         if (!(vp->v_flag & VSTANDARD)) {
1721                 simple_unlock(&vp->v_interlock);
1722                 return;
1723         }
1724
1725         /*
1726          * If a vgone (or vclean) is already in progress,
1727          * wait until it is done and return.
1728          */
1729         if (vp->v_flag & VXLOCK) {
1730                 while (vp->v_flag & VXLOCK) {
1731                         vp->v_flag |= VXWANT;
1732                         simple_unlock(&vp->v_interlock);
1733                         (void)tsleep((caddr_t)vp, PINOD, "vgone", 0);
1734                 }
1735                 return;
1736         }
1737         /*
1738          * Clean out the filesystem specific data.
1739          */
1740         vclean(vp, DOCLOSE, p);
1741         /*
1742          * Delete from old mount point vnode list, if on one.
1743          */
1744         if (vp->v_mount != NULL)
1745                 insmntque(vp, (struct mount *)0);
1746         /*
1747          * If special device, remove it from special device alias list
1748          * if it is on one.
1749          */
1750         if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1751                 simple_lock(&spechash_slock);
1752                 if (*vp->v_hashchain == vp) {
1753                         *vp->v_hashchain = vp->v_specnext;
1754                 } else {
1755                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1756                                 if (vq->v_specnext != vp)
1757                                         continue;
1758                                 vq->v_specnext = vp->v_specnext;
1759                                 break;
1760                         }
1761                         if (vq == NULL)
1762                                 panic("missing bdev");
1763                 }
1764                 if (vp->v_flag & VALIASED) {
1765                         vx = NULL;
1766                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1767                                 if (vq->v_rdev != vp->v_rdev ||
1768                                     vq->v_type != vp->v_type)
1769                                         continue;
1770                                 if (vx)
1771                                         break;
1772                                 vx = vq;
1773                         }
1774                         if (vx == NULL)
1775                                 panic("missing alias");
1776                         if (vq == NULL)
1777                                 vx->v_flag &= ~VALIASED;
1778                         vp->v_flag &= ~VALIASED;
1779                 }
1780                 simple_unlock(&spechash_slock);
1781                 FREE_ZONE(vp->v_specinfo, sizeof (struct specinfo), M_VNODE);
1782                 vp->v_specinfo = NULL;
1783         }
1784         /*
1785          * If it is on the freelist and not already at the head,
1786          * move it to the head of the list. The test of the back
1787          * pointer and the reference count of zero is because
1788          * it will be removed from the free list by getnewvnode,
1789          * but will not have its reference count incremented until
1790          * after calling vgone. If the reference count were
1791          * incremented first, vgone would (incorrectly) try to
1792          * close the previous instance of the underlying object.
1793          * So, the back pointer is explicitly set to `0xdeadb' in
1794          * getnewvnode after removing it from the freelist to ensure
1795          * that we do not try to move it here.
1796          */
1797         if (vp->v_usecount == 0) {
1798                 simple_lock(&vnode_free_list_slock);
1799                 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1800                     vnode_free_list.tqh_first != vp) {
1801                         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1802                         TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1803                 }
1804                 simple_unlock(&vnode_free_list_slock);
1805         }
1806         vp->v_type = VBAD;
1807 }
1808
1809 /*
1810  * Lookup a vnode by device number.
1811  */
1812 int
1813 vfinddev(dev, type, vpp)
1814         dev_t dev;
1815         enum vtype type;
1816         struct vnode **vpp;
1817 {
1818         struct vnode *vp;
1819         int rc = 0;
1820
1821         simple_lock(&spechash_slock);
1822         for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1823                 if (dev != vp->v_rdev || type != vp->v_type)
1824                         continue;
1825                 *vpp = vp;
1826                 rc = 1;
1827                 break;
1828         }
1829         simple_unlock(&spechash_slock);
1830         return (rc);
1831 }
1832
1833 /*
1834  * Calculate the total number of references to a special device.
1835  */
1836 int
1837 vcount(vp)
1838         struct vnode *vp;
1839 {
1840         struct vnode *vq, *vnext;
1841         int count;
1842
1843 loop:
1844         if ((vp->v_flag & VALIASED) == 0)
1845                 return (vp->v_usecount);
1846         simple_lock(&spechash_slock);
1847         for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1848                 vnext = vq->v_specnext;
1849                 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1850                         continue;
1851                 /*
1852                  * Alias, but not in use, so flush it out.
1853                  */
1854                 if (vq->v_usecount == 0 && vq != vp) {
1855                         simple_unlock(&spechash_slock);
1856                         vgone(vq);
1857                         goto loop;
1858                 }
1859                 count += vq->v_usecount;
1860         }
1861         simple_unlock(&spechash_slock);
1862         return (count);
1863 }
1864
1865 int     prtactive = 0;          /* 1 => print out reclaim of active vnodes */
1866
1867 /*
1868  * Print out a description of a vnode.
1869  */
1870 static char *typename[] =
1871    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1872
1873 void
1874 vprint(label, vp)
1875         char *label;
1876         register struct vnode *vp;
1877 {
1878         char buf[64];
1879
1880         if (label != NULL)
1881                 printf("%s: ", label);
1882         printf("type %s, usecount %d, writecount %d, refcount %d,",
1883                 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1884                 vp->v_holdcnt);
1885         buf[0] = '\0';
1886         if (vp->v_flag & VROOT)
1887                 strcat(buf, "|VROOT");
1888         if (vp->v_flag & VTEXT)
1889                 strcat(buf, "|VTEXT");
1890         if (vp->v_flag & VSYSTEM)
1891                 strcat(buf, "|VSYSTEM");
1892         if (vp->v_flag & VNOFLUSH)
1893                 strcat(buf, "|VNOFLUSH");
1894         if (vp->v_flag & VXLOCK)
1895                 strcat(buf, "|VXLOCK");
1896         if (vp->v_flag & VXWANT)
1897                 strcat(buf, "|VXWANT");
1898         if (vp->v_flag & VBWAIT)
1899                 strcat(buf, "|VBWAIT");
1900         if (vp->v_flag & VALIASED)
1901                 strcat(buf, "|VALIASED");
1902         if (buf[0] != '\0')
1903                 printf(" flags (%s)", &buf[1]);
1904         if (vp->v_data == NULL) {
1905                 printf("\n");
1906         } else {
1907                 printf("\n\t");
1908                 VOP_PRINT(vp);
1909         }
1910 }
1911
1912 #ifdef DEBUG
1913 /*
1914  * List all of the locked vnodes in the system.
1915  * Called when debugging the kernel.
1916  */
1917 void
1918 printlockedvnodes()
1919 {
1920         struct proc *p = current_proc();
1921         struct mount *mp, *nmp;
1922         struct vnode *vp;
1923
1924         printf("Locked vnodes\n");
1925         simple_lock(&mountlist_slock);
1926         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1927                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1928                         nmp = mp->mnt_list.cqe_next;
1929                         continue;
1930                 }
1931                 for (vp = mp->mnt_vnodelist.lh_first;
1932                      vp != NULL;
1933                      vp = vp->v_mntvnodes.le_next) {
1934                         if (VOP_ISLOCKED(vp))
1935                                 vprint((char *)0, vp);
1936                 }
1937                 simple_lock(&mountlist_slock);
1938                 nmp = mp->mnt_list.cqe_next;
1939                 vfs_unbusy(mp, p);
1940         }
1941         simple_unlock(&mountlist_slock);
1942 }
1943 #endif
1944
1945 /*
1946  * Top level filesystem related information gathering.
1947  */
1948 int
1949 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1950         int *name;
1951         u_int namelen;
1952         void *oldp;
1953         size_t *oldlenp;
1954         void *newp;
1955         size_t newlen;
1956         struct proc *p;
1957 {
1958         struct vfsconf *vfsp;
1959
1960         /*
1961          * The VFS_NUMMNTOPS shouldn't be at name[0] since
1962          * is a VFS generic variable. So now we must check
1963          * namelen so we don't end up covering any UFS
1964          * variables (sinc UFS vfc_typenum is 1).
1965          *
1966          * It should have been:
1967          *    name[0]:  VFS_GENERIC
1968          *    name[1]:  VFS_NUMMNTOPS
1969          */
1970         if (namelen == 1 && name[0] == VFS_NUMMNTOPS) {
1971                 extern unsigned int vfs_nummntops;
1972                 return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops));
1973         }
1974
1975         /* all sysctl names at this level are at least name and field */
1976         if (namelen < 2)
1977                 return (ENOTDIR);               /* overloaded */
1978         if (name[0] != VFS_GENERIC) {
1979                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1980                         if (vfsp->vfc_typenum == name[0])
1981                                 break;
1982                 if (vfsp == NULL)
1983                         return (EOPNOTSUPP);
1984                 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1985                     oldp, oldlenp, newp, newlen, p));
1986         }
1987         switch (name[1]) {
1988         case VFS_MAXTYPENUM:
1989                 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1990         case VFS_CONF:
1991                 if (namelen < 3)
1992                         return (ENOTDIR);       /* overloaded */
1993                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1994                         if (vfsp->vfc_typenum == name[2])
1995                                 break;
1996                 if (vfsp == NULL)
1997                         return (EOPNOTSUPP);
1998                 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
1999                     sizeof(struct vfsconf)));
2000         }
2001         return (EOPNOTSUPP);
2002 }
2003
2004 int kinfo_vdebug = 1;
2005 #define KINFO_VNODESLOP 10
2006 /*
2007  * Dump vnode list (via sysctl).
2008  * Copyout address of vnode followed by vnode.
2009  */
2010 /* ARGSUSED */
2011 int
2012 sysctl_vnode(where, sizep, p)
2013         char *where;
2014         size_t *sizep;
2015         struct proc *p;
2016 {
2017         struct mount *mp, *nmp;
2018         struct vnode *nvp, *vp;
2019         char *bp = where, *savebp;
2020         char *ewhere;
2021         int error;
2022
2023 #define VPTRSZ  sizeof (struct vnode *)
2024 #define VNODESZ sizeof (struct vnode)
2025         if (where == NULL) {
2026                 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2027                 return (0);
2028         }
2029         ewhere = where + *sizep;
2030
2031         simple_lock(&mountlist_slock);
2032         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2033                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
2034                         nmp = mp->mnt_list.cqe_next;
2035                         continue;
2036                 }
2037                 savebp = bp;
2038 again:
2039                 simple_lock(&mntvnode_slock);
2040                 for (vp = mp->mnt_vnodelist.lh_first;
2041                      vp != NULL;
2042                      vp = nvp) {
2043                         /*
2044                          * Check that the vp is still associated with
2045                          * this filesystem.  RACE: could have been
2046                          * recycled onto the same filesystem.
2047                          */
2048                         if (vp->v_mount != mp) {
2049                                 simple_unlock(&mntvnode_slock);
2050                                 if (kinfo_vdebug)
2051                                         printf("kinfo: vp changed\n");
2052                                 bp = savebp;
2053                                 goto again;
2054                         }
2055                         nvp = vp->v_mntvnodes.le_next;
2056                         if (bp + VPTRSZ + VNODESZ > ewhere) {
2057                                 simple_unlock(&mntvnode_slock);
2058                                 *sizep = bp - where;
2059                                 return (ENOMEM);
2060                         }
2061                         simple_unlock(&mntvnode_slock);
2062                         if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
2063                            (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
2064                                 return (error);
2065                         bp += VPTRSZ + VNODESZ;
2066                         simple_lock(&mntvnode_slock);
2067                 }
2068                 simple_unlock(&mntvnode_slock);
2069                 simple_lock(&mountlist_slock);
2070                 nmp = mp->mnt_list.cqe_next;
2071                 vfs_unbusy(mp, p);
2072         }
2073         simple_unlock(&mountlist_slock);
2074
2075         *sizep = bp - where;
2076         return (0);
2077 }
2078
2079 /*
2080  * Check to see if a filesystem is mounted on a block device.
2081  */
2082 int
2083 vfs_mountedon(vp)
2084         struct vnode *vp;
2085 {
2086         struct vnode *vq;
2087         int error = 0;
2088
2089         if (vp->v_specflags & SI_MOUNTEDON)
2090                 return (EBUSY);
2091         if (vp->v_flag & VALIASED) {
2092                 simple_lock(&spechash_slock);
2093                 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
2094                         if (vq->v_rdev != vp->v_rdev ||
2095                             vq->v_type != vp->v_type)
2096                                 continue;
2097                         if (vq->v_specflags & SI_MOUNTEDON) {
2098                                 error = EBUSY;
2099                                 break;
2100                         }
2101                 }
2102                 simple_unlock(&spechash_slock);
2103         }
2104         return (error);
2105 }
2106
2107 /*
2108  * Unmount all filesystems. The list is traversed in reverse order
2109  * of mounting to avoid dependencies.
2110  */
2111 __private_extern__ void
2112 vfs_unmountall()
2113 {
2114         struct mount *mp, *nmp;
2115         struct proc *p = current_proc();
2116
2117         /*
2118          * Since this only runs when rebooting, it is not interlocked.
2119          */
2120         for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2121                 nmp = mp->mnt_list.cqe_prev;
2122                 (void) dounmount(mp, MNT_FORCE, p);
2123         }
2124 }
2125
2126 /*
2127  * Build hash lists of net addresses and hang them off the mount point.
2128  * Called by vfs_export() to set up the lists of export addresses.
2129  */
2130 static int
2131 vfs_hang_addrlist(mp, nep, argp)
2132         struct mount *mp;
2133         struct netexport *nep;
2134         struct export_args *argp;
2135 {
2136         register struct netcred *np;
2137         register struct radix_node_head *rnh;
2138         register int i;
2139         struct radix_node *rn;
2140         struct sockaddr *saddr, *smask = 0;
2141         struct domain *dom;
2142         int error;
2143
2144         if (argp->ex_addrlen == 0) {
2145                 if (mp->mnt_flag & MNT_DEFEXPORTED)
2146                         return (EPERM);
2147                 np = &nep->ne_defexported;
2148                 np->netc_exflags = argp->ex_flags;
2149                 np->netc_anon = argp->ex_anon;
2150                 np->netc_anon.cr_ref = 1;
2151                 mp->mnt_flag |= MNT_DEFEXPORTED;
2152                 return (0);
2153         }
2154         i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2155         MALLOC(np, struct netcred *, i, M_NETADDR, M_WAITOK);
2156         bzero((caddr_t)np, i);
2157         saddr = (struct sockaddr *)(np + 1);
2158         if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
2159                 goto out;
2160         if (saddr->sa_len > argp->ex_addrlen)
2161                 saddr->sa_len = argp->ex_addrlen;
2162         if (argp->ex_masklen) {
2163                 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
2164                 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
2165                 if (error)
2166                         goto out;
2167                 if (smask->sa_len > argp->ex_masklen)
2168                         smask->sa_len = argp->ex_masklen;
2169         }
2170         i = saddr->sa_family;
2171         if ((rnh = nep->ne_rtable[i]) == 0) {
2172                 /*
2173                  * Seems silly to initialize every AF when most are not
2174                  * used, do so on demand here
2175                  */
2176                 for (dom = domains; dom; dom = dom->dom_next)
2177                         if (dom->dom_family == i && dom->dom_rtattach) {
2178                                 dom->dom_rtattach((void **)&nep->ne_rtable[i],
2179                                         dom->dom_rtoffset);
2180                                 break;
2181                         }
2182                 if ((rnh = nep->ne_rtable[i]) == 0) {
2183                         error = ENOBUFS;
2184                         goto out;
2185                 }
2186         }
2187         rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
2188                 np->netc_rnodes);
2189         if (rn == 0) {
2190                 /*
2191                  * One of the reasons that rnh_addaddr may fail is that
2192                  * the entry already exists. To check for this case, we
2193                  * look up the entry to see if it is there. If so, we
2194                  * do not need to make a new entry but do return success.
2195                  */
2196                 _FREE(np, M_NETADDR);
2197                 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
2198                 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
2199                     ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
2200                     !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
2201                             (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
2202                         return (0);
2203                 return (EPERM);
2204         }
2205         np->netc_exflags = argp->ex_flags;
2206         np->netc_anon = argp->ex_anon;
2207         np->netc_anon.cr_ref = 1;
2208         return (0);
2209 out:
2210         _FREE(np, M_NETADDR);
2211         return (error);
2212 }
2213
2214 /* ARGSUSED */
2215 static int
2216 vfs_free_netcred(rn, w)
2217         struct radix_node *rn;
2218         caddr_t w;
2219 {
2220         register struct radix_node_head *rnh = (struct radix_node_head *)w;
2221
2222         (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
2223         _FREE((caddr_t)rn, M_NETADDR);
2224         return (0);
2225 }
2226
2227 /*
2228  * Free the net address hash lists that are hanging off the mount points.
2229  */
2230 static void
2231 vfs_free_addrlist(nep)
2232         struct netexport *nep;
2233 {
2234         register int i;
2235         register struct radix_node_head *rnh;
2236
2237         for (i = 0; i <= AF_MAX; i++)
2238                 if (rnh = nep->ne_rtable[i]) {
2239                         (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
2240                             (caddr_t)rnh);
2241                         _FREE((caddr_t)rnh, M_RTABLE);
2242                         nep->ne_rtable[i] = 0;
2243                 }
2244 }
2245
2246 int
2247 vfs_export(mp, nep, argp)
2248         struct mount *mp;
2249         struct netexport *nep;
2250         struct export_args *argp;
2251 {
2252         int error;
2253
2254         if (argp->ex_flags & MNT_DELEXPORT) {
2255                 vfs_free_addrlist(nep);
2256                 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2257         }
2258         if (argp->ex_flags & MNT_EXPORTED) {
2259                 if (error = vfs_hang_addrlist(mp, nep, argp))
2260                         return (error);
2261                 mp->mnt_flag |= MNT_EXPORTED;
2262         }
2263         return (0);
2264 }
2265
2266 struct netcred *
2267 vfs_export_lookup(mp, nep, nam)
2268         register struct mount *mp;
2269         struct netexport *nep;
2270         struct mbuf *nam;
2271 {
2272         register struct netcred *np;
2273         register struct radix_node_head *rnh;
2274         struct sockaddr *saddr;
2275
2276         np = NULL;
2277         if (mp->mnt_flag & MNT_EXPORTED) {
2278                 /*
2279                  * Lookup in the export list first.
2280                  */
2281                 if (nam != NULL) {
2282                         saddr = mtod(nam, struct sockaddr *);
2283                         rnh = nep->ne_rtable[saddr->sa_family];
2284                         if (rnh != NULL) {
2285                                 np = (struct netcred *)
2286                                         (*rnh->rnh_matchaddr)((caddr_t)saddr,
2287                                                               rnh);
2288                                 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2289                                         np = NULL;
2290                         }
2291                 }
2292                 /*
2293                  * If no address match, use the default if it exists.
2294                  */
2295                 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2296                         np = &nep->ne_defexported;
2297         }
2298         return (np);
2299 }
2300
2301 /*
2302  * try to reclaim vnodes from the memory
2303  * object cache
2304  */
2305 static int
2306 vm_object_cache_reclaim(int count)
2307 {
2308         int cnt;
2309         void vnode_pager_release_from_cache(int *);
2310
2311         /* attempt to reclaim vnodes from VM object cache */
2312         cnt = count;
2313         vnode_pager_release_from_cache(&cnt);
2314         return(cnt);
2315 }
2316
2317 /*
2318  * Release memory object reference held by inactive vnodes
2319  * and then try to reclaim some vnodes from the memory
2320  * object cache
2321  */
2322 static int
2323 vnreclaim(int count)
2324 {
2325         int i, loopcnt;
2326         struct vnode *vp;
2327         int err;
2328         struct proc *p;
2329
2330         i = 0;
2331         loopcnt = 0;
2332
2333         /* Try to release "count" vnodes from the inactive list */
2334 restart:
2335         if (++loopcnt > inactivevnodes) {
2336                 /*
2337                  * I did my best trying to reclaim the vnodes.
2338                  * Do not try any more as that would only lead to
2339                  * long latencies. Also in the worst case
2340                  * this can get totally CPU bound.
2341                  * Just fall though and attempt a reclaim of VM
2342                  * object cache
2343                  */
2344                 goto out;
2345         }
2346
2347         simple_lock(&vnode_free_list_slock);
2348         for (vp = TAILQ_FIRST(&vnode_inactive_list);
2349                         (vp != NULLVP) && (i < count);
2350                         vp = TAILQ_NEXT(vp, v_freelist)) {
2351
2352                 if (!simple_lock_try(&vp->v_interlock))
2353                         continue;
2354
2355                 if (vp->v_usecount != 1)
2356                         panic("vnreclaim: v_usecount");
2357
2358                 if(!UBCINFOEXISTS(vp)) {
2359                         if (vp->v_type == VBAD) {
2360                                 VREMINACTIVE("vnreclaim", vp);
2361                                 simple_unlock(&vp->v_interlock);
2362                                 continue;
2363                         } else
2364                                 panic("non UBC vnode on inactive list");
2365                                 /* Should not reach here */
2366                 }
2367
2368                 /* If vnode is already being reclaimed, wait */
2369                 if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
2370                         vp->v_flag |= VXWANT;
2371                         simple_unlock(&vp->v_interlock);
2372                         simple_unlock(&vnode_free_list_slock);
2373                         (void)tsleep((caddr_t)vp, PINOD, "vocr", 0);
2374                         goto restart;
2375                 }
2376
2377                 VREMINACTIVE("vnreclaim", vp);
2378                 simple_unlock(&vnode_free_list_slock);
2379
2380                 if (ubc_issetflags(vp, UI_WASMAPPED)) {
2381                         /*
2382                          * We should not reclaim as it is likely
2383                          * to be in use. Let it die a natural death.
2384                          * Release the UBC reference if one exists
2385                          * and put it back at the tail.
2386                          */
2387                         simple_unlock(&vp->v_interlock);
2388                         if (ubc_release_named(vp)) {
2389                                 if (UBCINFOEXISTS(vp)) {
2390                                         simple_lock(&vp->v_interlock);
2391                                         if (vp->v_usecount == 1 && !VONLIST(vp))
2392                                                 vinactive(vp);
2393                                         simple_unlock(&vp->v_interlock);
2394                                 }
2395                         } else {
2396                             simple_lock(&vp->v_interlock);
2397                                 vinactive(vp);
2398                                 simple_unlock(&vp->v_interlock);
2399                         }
2400                 } else {
2401                         int didhold;
2402
2403                         VORECLAIM_ENABLE(vp);
2404
2405                         /*
2406                          * scrub the dirty pages and invalidate the buffers
2407                          */
2408                         p = current_proc();
2409                         err = vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p);
2410                         if (err) {
2411                                 /* cannot reclaim */
2412                                 simple_lock(&vp->v_interlock);
2413                                 vinactive(vp);
2414                                 VORECLAIM_DISABLE(vp);
2415                                 i++;
2416                                 simple_unlock(&vp->v_interlock);
2417                                 goto restart;
2418                         }
2419
2420                         /* keep the vnode alive so we can kill it */
2421                         simple_lock(&vp->v_interlock);
2422                         if(vp->v_usecount != 1)
2423                                 panic("VOCR: usecount race");
2424                         vp->v_usecount++;
2425                         simple_unlock(&vp->v_interlock);
2426
2427                         /* clean up the state in VM without invalidating */
2428                         didhold = ubc_hold(vp);
2429                         if (didhold)
2430                                 (void)ubc_clean(vp, 0);
2431
2432                         /* flush and invalidate buffers associated with the vnode */
2433                         if (vp->v_tag == VT_NFS)
2434                                 nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
2435                         else
2436                                 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
2437
2438                         /*
2439                          * Note: for the v_usecount == 2 case, VOP_INACTIVE
2440                          * has not yet been called.  Call it now while vp is
2441                          * still locked, it will also release the lock.
2442                          */
2443                         if (vp->v_usecount == 2)
2444                                 VOP_INACTIVE(vp, p);
2445                         else
2446                                 VOP_UNLOCK(vp, 0, p);
2447
2448                         if (didhold)
2449                                 ubc_rele(vp);
2450
2451                         /*
2452                          * destroy the ubc named reference.
2453                          * If we can't because it is held for I/Os
2454                          * in progress, just put it back on the inactive
2455                          * list and move on.  Otherwise, the paging reference
2456                          * is toast (and so is this vnode?).
2457                          */
2458                         if (ubc_destroy_named(vp)) {
2459                             i++;
2460                         }
2461                         simple_lock(&vp->v_interlock);
2462                         VORECLAIM_DISABLE(vp);
2463                         simple_unlock(&vp->v_interlock);
2464                         vrele(vp);  /* release extra use we added here */
2465                 }
2466                 /* inactive list lock was released, must restart */
2467                 goto restart;
2468         }
2469         simple_unlock(&vnode_free_list_slock);
2470
2471         vnode_reclaim_tried += i;
2472 out:
2473         i = vm_object_cache_reclaim(count);
2474         vnode_objects_reclaimed += i;
2475
2476         return(i);
2477 }
2478
2479 /*
2480  * This routine is called from vnode_pager_no_senders()
2481  * which in turn can be called with vnode locked by vnode_uncache()
2482  * But it could also get called as a result of vm_object_cache_trim().
2483  * In that case lock state is unknown.
2484  * AGE the vnode so that it gets recycled quickly.
2485  * Check lock status to decide whether to call vput() or vrele().
2486  */
2487 __private_extern__ void
2488 vnode_pager_vrele(struct vnode *vp)
2489 {
2490
2491         boolean_t       funnel_state;
2492         int isvnreclaim = 1;
2493
2494         if (vp == (struct vnode *) NULL)
2495                 panic("vnode_pager_vrele: null vp");
2496
2497         funnel_state = thread_funnel_set(kernel_flock, TRUE);
2498
2499         /* Mark the vnode to be recycled */
2500         vagevp(vp);
2501
2502         simple_lock(&vp->v_interlock);
2503         /*
2504          * If a vgone (or vclean) is already in progress,
2505          * Do not bother with the ubc_info cleanup.
2506          * Let the vclean deal with it.
2507          */
2508         if (vp->v_flag & VXLOCK) {
2509                 CLR(vp->v_flag, VTERMINATE);
2510                 if (ISSET(vp->v_flag, VTERMWANT)) {
2511                         CLR(vp->v_flag, VTERMWANT);
2512                         wakeup((caddr_t)&vp->v_ubcinfo);
2513                 }
2514                 simple_unlock(&vp->v_interlock);
2515                 vrele(vp);
2516                 (void) thread_funnel_set(kernel_flock, funnel_state);
2517                 return;
2518         }
2519
2520         /* It's dead, Jim! */
2521         if (!ISSET(vp->v_flag, VORECLAIM)) {
2522                 /*
2523                  * called as a result of eviction of the memory
2524                  * object from the memory object cache
2525                  */
2526                 isvnreclaim = 0;
2527
2528                 /* So serialize vnode operations */
2529                 VORECLAIM_ENABLE(vp);
2530         }
2531         if (!ISSET(vp->v_flag, VTERMINATE))
2532                 SET(vp->v_flag, VTERMINATE);
2533         if (UBCINFOEXISTS(vp)) {
2534                 struct ubc_info *uip = vp->v_ubcinfo;
2535
2536                 if (ubc_issetflags(vp, UI_WASMAPPED))
2537                         SET(vp->v_flag, VWASMAPPED);
2538
2539                 vp->v_ubcinfo = UBC_NOINFO;  /* catch bad accesses */
2540                 simple_unlock(&vp->v_interlock);
2541                 ubc_info_deallocate(uip);
2542         } else {
2543                 if ((vp->v_type == VBAD) && ((vp)->v_ubcinfo != UBC_INFO_NULL)
2544                         && ((vp)->v_ubcinfo != UBC_NOINFO)) {
2545                         struct ubc_info *uip = vp->v_ubcinfo;
2546
2547                         vp->v_ubcinfo = UBC_NOINFO;  /* catch bad accesses */
2548                         simple_unlock(&vp->v_interlock);
2549                         ubc_info_deallocate(uip);
2550                 } else {
2551                         simple_unlock(&vp->v_interlock);
2552                 }
2553         }
2554
2555         CLR(vp->v_flag, VTERMINATE);
2556
2557         if (vp->v_type != VBAD){
2558                 vgone(vp);      /* revoke the vnode */
2559                 vrele(vp);      /* and drop the reference */
2560         } else
2561                 vrele(vp);
2562
2563         if (ISSET(vp->v_flag, VTERMWANT)) {
2564                 CLR(vp->v_flag, VTERMWANT);
2565                 wakeup((caddr_t)&vp->v_ubcinfo);
2566         }
2567         if (!isvnreclaim)
2568                 VORECLAIM_DISABLE(vp);
2569         (void) thread_funnel_set(kernel_flock, funnel_state);
2570         return;
2571 }
2572
2573
2574 #if DIAGNOSTIC
2575 int walk_vnodes_debug=0;
2576
2577 void
2578 walk_allvnodes()
2579 {
2580         struct mount *mp, *nmp;
2581         struct vnode *vp;
2582         int cnt = 0;
2583
2584         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2585                 for (vp = mp->mnt_vnodelist.lh_first;
2586                      vp != NULL;
2587                      vp = vp->v_mntvnodes.le_next) {
2588                         if (vp->v_usecount < 0){
2589                                 if(walk_vnodes_debug) {
2590                                         printf("vp is %x\n",vp);
2591                                 }
2592                         }
2593                 }
2594                 nmp = mp->mnt_list.cqe_next;
2595         }
2596         for (cnt = 0, vp = vnode_free_list.tqh_first;
2597                 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2598                 if ((vp->v_usecount < 0) && walk_vnodes_debug) {
2599                         if(walk_vnodes_debug) {
2600                                 printf("vp is %x\n",vp);
2601                         }
2602                 }
2603         }
2604         printf("%d - free\n", cnt);
2605
2606         for (cnt = 0, vp = vnode_inactive_list.tqh_first;
2607                 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2608                 if ((vp->v_usecount < 0) && walk_vnodes_debug) {
2609                         if(walk_vnodes_debug) {
2610                                 printf("vp is %x\n",vp);
2611                         }
2612                 }
2613         }
2614         printf("%d - inactive\n", cnt);
2615 }
2616 #endif /* DIAGNOSTIC */
2617
2618 void
2619 vfs_io_attributes(vp, flags, iosize, vectors)
2620         struct vnode    *vp;
2621         int     flags;  /* B_READ or B_WRITE */
2622         int     *iosize;
2623         int     *vectors;
2624 {
2625         struct mount *mp;
2626
2627         /* start with "reasonable" defaults */
2628         *iosize = MAXPHYS;
2629         *vectors = 32;
2630
2631         mp = vp->v_mount;
2632         if (mp != NULL) {
2633                 switch (flags) {
2634                 case B_READ:
2635                         *iosize = mp->mnt_maxreadcnt;
2636                         *vectors = mp->mnt_segreadcnt;
2637                         break;
2638                 case B_WRITE:
2639                         *iosize = mp->mnt_maxwritecnt;
2640                         *vectors = mp->mnt_segwritecnt;
2641                         break;
2642                 default:
2643                         break;
2644                 }
2645         }
2646
2647         return;
2648 }
2649
2650 #include <dev/disk.h>
2651
2652 int
2653 vfs_init_io_attributes(devvp, mp)
2654         struct vnode *devvp;
2655         struct mount *mp;
2656 {
2657         int error;
2658         off_t readblockcnt;
2659         off_t writeblockcnt;
2660         off_t readsegcnt;
2661         off_t writesegcnt;
2662         u_long blksize;
2663
2664         u_int64_t temp;
2665
2666         struct proc *p = current_proc();
2667         struct  ucred *cred = p->p_ucred;
2668
2669         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD,
2670                                 (caddr_t)&readblockcnt, 0, cred, p)))
2671                 return (error);
2672
2673         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE,
2674                                 (caddr_t)&writeblockcnt, 0, cred, p)))
2675                 return (error);
2676
2677         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD,
2678                                 (caddr_t)&readsegcnt, 0, cred, p)))
2679                 return (error);
2680
2681         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE,
2682                                 (caddr_t)&writesegcnt, 0, cred, p)))
2683                 return (error);
2684
2685         if ((error = VOP_IOCTL(devvp, DKIOCGETBLOCKSIZE,
2686                                 (caddr_t)&blksize, 0, cred, p)))
2687                 return (error);
2688
2689         temp = readblockcnt * blksize;
2690         temp = (temp > UINT32_MAX) ? (UINT32_MAX / blksize) * blksize : temp;
2691         mp->mnt_maxreadcnt = (u_int32_t)temp;
2692
2693         temp = writeblockcnt * blksize;
2694         temp = (temp > UINT32_MAX) ? (UINT32_MAX / blksize) * blksize : temp;
2695         mp->mnt_maxwritecnt = (u_int32_t)temp;
2696
2697         temp = (readsegcnt > UINT16_MAX) ? UINT16_MAX : readsegcnt;
2698         mp->mnt_segreadcnt = (u_int16_t)temp;
2699
2700         temp = (writesegcnt > UINT16_MAX) ? UINT16_MAX : writesegcnt;
2701         mp->mnt_segwritecnt = (u_int16_t)temp;
2702
2703 #if 0
2704         printf("--- IO attributes for mount point 0x%08x ---\n", mp);
2705         printf("\tmnt_maxreadcnt = 0x%x", mp->mnt_maxreadcnt);
2706         printf("\tmnt_maxwritecnt = 0x%x\n", mp->mnt_maxwritecnt);
2707         printf("\tmnt_segreadcnt = 0x%x", mp->mnt_segreadcnt);
2708         printf("\tmnt_segwritecnt = 0x%x\n", mp->mnt_segwritecnt);
2709 #endif /* 0 */
2710
2711         return (error);
2712 }
2713