bsd/vfs/vfs_subr.c

   1 /*
   2  * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   7  *
   8  * This file contains Original Code and/or Modifications of Original Code
   9  * as defined in and that are subject to the Apple Public Source License
  10  * Version 2.0 (the 'License'). You may not use this file except in
  11  * compliance with the License. Please obtain a copy of the License at
  12  * http://www.opensource.apple.com/apsl/ and read it before using this
  13  * file.
  14  *
  15  * The Original Code and all software distributed under the License are
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  20  * Please see the License for the specific language governing rights and
  21  * limitations under the License.
  22  *
  23  * @APPLE_LICENSE_HEADER_END@
  24  */
  25 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  26 /*
  27  * Copyright (c) 1989, 1993
  28  *      The Regents of the University of California.  All rights reserved.
  29  * (c) UNIX System Laboratories, Inc.
  30  * All or some portions of this file are derived from material licensed
  31  * to the University of California by American Telephone and Telegraph
  32  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  33  * the permission of UNIX System Laboratories, Inc.
  34  *
  35  * Redistribution and use in source and binary forms, with or without
  36  * modification, are permitted provided that the following conditions
  37  * are met:
  38  * 1. Redistributions of source code must retain the above copyright
  39  *    notice, this list of conditions and the following disclaimer.
  40  * 2. Redistributions in binary form must reproduce the above copyright
  41  *    notice, this list of conditions and the following disclaimer in the
  42  *    documentation and/or other materials provided with the distribution.
  43  * 3. All advertising materials mentioning features or use of this software
  44  *    must display the following acknowledgement:
  45  *      This product includes software developed by the University of
  46  *      California, Berkeley and its contributors.
  47  * 4. Neither the name of the University nor the names of its contributors
  48  *    may be used to endorse or promote products derived from this software
  49  *    without specific prior written permission.
  50  *
  51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  61  * SUCH DAMAGE.
  62  *
  63  *      @(#)vfs_subr.c  8.31 (Berkeley) 5/26/95
  64  */
  65
  66 /*
  67  * External virtual filesystem routines
  68  */
  69
  70 #undef  DIAGNOSTIC
  71 #define DIAGNOSTIC 1
  72
  73 #include <sys/param.h>
  74 #include <sys/systm.h>
  75 #include <sys/proc.h>
  76 #include <sys/mount.h>
  77 #include <sys/time.h>
  78 #include <sys/vnode.h>
  79 #include <sys/stat.h>
  80 #include <sys/namei.h>
  81 #include <sys/ucred.h>
  82 #include <sys/buf.h>
  83 #include <sys/errno.h>
  84 #include <sys/malloc.h>
  85 #include <sys/domain.h>
  86 #include <sys/mbuf.h>
  87 #include <sys/syslog.h>
  88 #include <sys/ubc.h>
  89 #include <sys/vm.h>
  90 #include <sys/sysctl.h>
  91
  92 #include <kern/assert.h>
  93
  94 #include <miscfs/specfs/specdev.h>
  95
  96 #include <mach/mach_types.h>
  97 #include <mach/memory_object_types.h>
  98
  99
 100 enum vtype iftovt_tab[16] = {
 101         VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 102         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
 103 };
 104 int     vttoif_tab[9] = {
 105         0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
 106         S_IFSOCK, S_IFIFO, S_IFMT,
 107 };
 108
 109 static void vfree(struct vnode *vp);
 110 static void vinactive(struct vnode *vp);
 111 static int vnreclaim(int count);
 112 extern kern_return_t
 113         adjust_vm_object_cache(vm_size_t oval, vm_size_t nval);
 114
 115 TAILQ_HEAD(freelst, vnode) vnode_free_list;     /* vnode free list */
 116 TAILQ_HEAD(inactivelst, vnode) vnode_inactive_list;     /* vnode inactive list */
 117 struct mntlist mountlist;                       /* mounted filesystem list */
 118
 119 #if DIAGNOSTIC
 120 #define VLISTCHECK(fun, vp, list)       \
 121         if ((vp)->v_freelist.tqe_prev == (struct vnode **)0xdeadb) \
 122                 panic("%s: %s vnode not on %slist", (fun), (list), (list));
 123
 124 #define VINACTIVECHECK(fun, vp, expected)       \
 125         do {    \
 126                 int __is_inactive = ISSET((vp)->v_flag, VUINACTIVE);    \
 127                 if (__is_inactive ^ expected)   \
 128                         panic("%s: %sinactive vnode, expected %s", (fun),       \
 129                                 __is_inactive? "" : "not ",     \
 130                                 expected? "inactive": "not inactive"); \
 131         } while(0)
 132 #else
 133 #define VLISTCHECK(fun, vp, list)
 134 #define VINACTIVECHECK(fun, vp, expected)
 135 #endif /* DIAGNOSTIC */
 136
 137 #define VLISTNONE(vp)   \
 138         do {    \
 139                 (vp)->v_freelist.tqe_next = (struct vnode *)0;  \
 140                 (vp)->v_freelist.tqe_prev = (struct vnode **)0xdeadb;   \
 141         } while(0)
 142
 143 #define VONLIST(vp)     \
 144         ((vp)->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
 145
 146 /* remove a vnode from free vnode list */
 147 #define VREMFREE(fun, vp)       \
 148         do {    \
 149                 VLISTCHECK((fun), (vp), "free");        \
 150                 TAILQ_REMOVE(&vnode_free_list, (vp), v_freelist);       \
 151                 VLISTNONE((vp));        \
 152                 freevnodes--;   \
 153         } while(0)
 154
 155 /* remove a vnode from inactive vnode list */
 156 #define VREMINACTIVE(fun, vp)   \
 157         do {    \
 158                 VLISTCHECK((fun), (vp), "inactive"); \
 159                 VINACTIVECHECK((fun), (vp), VUINACTIVE); \
 160                 TAILQ_REMOVE(&vnode_inactive_list, (vp), v_freelist); \
 161                 CLR((vp)->v_flag, VUINACTIVE); \
 162                 VLISTNONE((vp));        \
 163                 inactivevnodes--;       \
 164         } while(0)
 165
 166 #define VORECLAIM_ENABLE(vp)   \
 167         do {    \
 168                 if (ISSET((vp)->v_flag, VORECLAIM))     \
 169                         panic("vm object raclaim already");     \
 170                 SET((vp)->v_flag, VORECLAIM);   \
 171         } while(0)
 172
 173 #define VORECLAIM_DISABLE(vp)   \
 174         do {    \
 175                 CLR((vp)->v_flag, VORECLAIM);   \
 176                 if (ISSET((vp)->v_flag, VXWANT)) {      \
 177                         CLR((vp)->v_flag, VXWANT);      \
 178                         wakeup((caddr_t)(vp));  \
 179                 }       \
 180         } while(0)
 181
 182 /*
 183  * Have to declare first two locks as actual data even if !MACH_SLOCKS, since
 184  * a pointers to them get passed around.
 185  */
 186 simple_lock_data_t mountlist_slock;
 187 simple_lock_data_t mntvnode_slock;
 188 decl_simple_lock_data(,mntid_slock);
 189 decl_simple_lock_data(,vnode_free_list_slock);
 190 decl_simple_lock_data(,spechash_slock);
 191
 192 /*
 193  * vnodetarget is the amount of vnodes we expect to get back
 194  * from the the inactive vnode list and VM object cache.
 195  * As vnreclaim() is a mainly cpu bound operation for faster
 196  * processers this number could be higher.
 197  * Having this number too high introduces longer delays in
 198  * the execution of getnewvnode().
 199  */
 200 unsigned long vnodetarget;              /* target for vnreclaim() */
 201 #define VNODE_FREE_TARGET       20      /* Default value for vnodetarget */
 202
 203 /*
 204  * We need quite a few vnodes on the free list to sustain the
 205  * rapid stat() the compilation process does, and still benefit from the name
 206  * cache. Having too few vnodes on the free list causes serious disk
 207  * thrashing as we cycle through them.
 208  */
 209 #define VNODE_FREE_MIN          300     /* freelist should have at least these many */
 210
 211 /*
 212  * We need to get vnodes back from the VM object cache when a certain #
 213  * of vnodes are reused from the freelist. This is essential for the
 214  * caching to be effective in the namecache and the buffer cache [for the
 215  * metadata].
 216  */
 217 #define VNODE_TOOMANY_REUSED    (VNODE_FREE_MIN/4)
 218
 219 /*
 220  * If we have enough vnodes on the freelist we do not want to reclaim
 221  * the vnodes from the VM object cache.
 222  */
 223 #define VNODE_FREE_ENOUGH       (VNODE_FREE_MIN + (VNODE_FREE_MIN/2))
 224
 225 /*
 226  * Initialize the vnode management data structures.
 227  */
 228 __private_extern__ void
 229 vntblinit()
 230 {
 231         extern struct lock__bsd__       exchangelock;
 232
 233         simple_lock_init(&mountlist_slock);
 234         simple_lock_init(&mntvnode_slock);
 235         simple_lock_init(&mntid_slock);
 236         simple_lock_init(&spechash_slock);
 237         TAILQ_INIT(&vnode_free_list);
 238         simple_lock_init(&vnode_free_list_slock);
 239         TAILQ_INIT(&vnode_inactive_list);
 240         CIRCLEQ_INIT(&mountlist);
 241     lockinit(&exchangelock, PVFS, "exchange", 0, 0);
 242
 243         if (!vnodetarget)
 244                 vnodetarget = VNODE_FREE_TARGET;
 245
 246         /*
 247          * Scale the vm_object_cache to accomodate the vnodes
 248          * we want to cache
 249          */
 250         (void) adjust_vm_object_cache(0, desiredvnodes - VNODE_FREE_MIN);
 251 }
 252
 253 /* Reset the VM Object Cache with the values passed in */
 254 __private_extern__ kern_return_t
 255 reset_vmobjectcache(unsigned int val1, unsigned int val2)
 256 {
 257         vm_size_t oval = val1 - VNODE_FREE_MIN;
 258         vm_size_t nval;
 259
 260         if(val2 < VNODE_FREE_MIN)
 261                 nval = 0;
 262         else
 263                 nval = val2 - VNODE_FREE_MIN;
 264
 265         return(adjust_vm_object_cache(oval, nval));
 266 }
 267
 268 /*
 269  * Mark a mount point as busy. Used to synchronize access and to delay
 270  * unmounting. Interlock is not released on failure.
 271  */
 272 int
 273 vfs_busy(mp, flags, interlkp, p)
 274         struct mount *mp;
 275         int flags;
 276         struct slock *interlkp;
 277         struct proc *p;
 278 {
 279         int lkflags;
 280
 281         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 282                 if (flags & LK_NOWAIT)
 283                         return (ENOENT);
 284                 mp->mnt_kern_flag |= MNTK_MWAIT;
 285                 if (interlkp)
 286                         simple_unlock(interlkp);
 287                 /*
 288                  * Since all busy locks are shared except the exclusive
 289                  * lock granted when unmounting, the only place that a
 290                  * wakeup needs to be done is at the release of the
 291                  * exclusive lock at the end of dounmount.
 292                  */
 293                 sleep((caddr_t)mp, PVFS);
 294                 if (interlkp)
 295                         simple_lock(interlkp);
 296                 return (ENOENT);
 297         }
 298         lkflags = LK_SHARED;
 299         if (interlkp)
 300                 lkflags |= LK_INTERLOCK;
 301         if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
 302                 panic("vfs_busy: unexpected lock failure");
 303         return (0);
 304 }
 305
 306 /*
 307  * Free a busy filesystem.
 308  */
 309 void
 310 vfs_unbusy(mp, p)
 311         struct mount *mp;
 312         struct proc *p;
 313 {
 314
 315         lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
 316 }
 317
 318 /*
 319  * Lookup a filesystem type, and if found allocate and initialize
 320  * a mount structure for it.
 321  *
 322  * Devname is usually updated by mount(8) after booting.
 323  */
 324 int
 325 vfs_rootmountalloc(fstypename, devname, mpp)
 326         char *fstypename;
 327         char *devname;
 328         struct mount **mpp;
 329 {
 330         struct proc *p = current_proc();        /* XXX */
 331         struct vfsconf *vfsp;
 332         struct mount *mp;
 333
 334         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 335                 if (!strcmp(vfsp->vfc_name, fstypename))
 336                         break;
 337         if (vfsp == NULL)
 338                 return (ENODEV);
 339         mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 340         bzero((char *)mp, (u_long)sizeof(struct mount));
 341
 342     /* Initialize the default IO constraints */
 343     mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
 344     mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
 345
 346         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
 347         (void)vfs_busy(mp, LK_NOWAIT, 0, p);
 348         LIST_INIT(&mp->mnt_vnodelist);
 349         mp->mnt_vfc = vfsp;
 350         mp->mnt_op = vfsp->vfc_vfsops;
 351         mp->mnt_flag = MNT_RDONLY;
 352         mp->mnt_vnodecovered = NULLVP;
 353         vfsp->vfc_refcount++;
 354         mp->mnt_stat.f_type = vfsp->vfc_typenum;
 355         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 356         strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
 357         mp->mnt_stat.f_mntonname[0] = '/';
 358         (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
 359         *mpp = mp;
 360         return (0);
 361 }
 362
 363 /*
 364  * Find an appropriate filesystem to use for the root. If a filesystem
 365  * has not been preselected, walk through the list of known filesystems
 366  * trying those that have mountroot routines, and try them until one
 367  * works or we have tried them all.
 368  */
 369 int
 370 vfs_mountroot()
 371 {
 372         struct vfsconf *vfsp;
 373         extern int (*mountroot)(void);
 374         int error;
 375
 376         if (mountroot != NULL) {
 377                 error = (*mountroot)();
 378                 return (error);
 379         }
 380
 381         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 382                 if (vfsp->vfc_mountroot == NULL)
 383                         continue;
 384                 if ((error = (*vfsp->vfc_mountroot)()) == 0)
 385                         return (0);
 386                 if (error != EINVAL)
 387                         printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
 388         }
 389         return (ENODEV);
 390 }
 391
 392 /*
 393  * Lookup a mount point by filesystem identifier.
 394  */
 395 struct mount *
 396 vfs_getvfs(fsid)
 397         fsid_t *fsid;
 398 {
 399         register struct mount *mp;
 400
 401         simple_lock(&mountlist_slock);
 402         for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
 403              mp = mp->mnt_list.cqe_next) {
 404                 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 405                     mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 406                         simple_unlock(&mountlist_slock);
 407                         return (mp);
 408                 }
 409         }
 410         simple_unlock(&mountlist_slock);
 411         return ((struct mount *)0);
 412 }
 413
 414 /*
 415  * Get a new unique fsid
 416  */
 417 void
 418 vfs_getnewfsid(mp)
 419         struct mount *mp;
 420 {
 421 static u_short xxxfs_mntid;
 422
 423         fsid_t tfsid;
 424         int mtype;
 425
 426         simple_lock(&mntid_slock);
 427         mtype = mp->mnt_vfc->vfc_typenum;
 428         mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
 429         mp->mnt_stat.f_fsid.val[1] = mtype;
 430         if (xxxfs_mntid == 0)
 431                 ++xxxfs_mntid;
 432         tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
 433         tfsid.val[1] = mtype;
 434         if (mountlist.cqh_first != (void *)&mountlist) {
 435                 while (vfs_getvfs(&tfsid)) {
 436                         tfsid.val[0]++;
 437                         xxxfs_mntid++;
 438                 }
 439         }
 440         mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
 441         simple_unlock(&mntid_slock);
 442 }
 443
 444 /*
 445  * Set vnode attributes to VNOVAL
 446  */
 447 void
 448 vattr_null(vap)
 449         register struct vattr *vap;
 450 {
 451
 452         vap->va_type = VNON;
 453         vap->va_size = vap->va_bytes = VNOVAL;
 454         vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
 455                 vap->va_fsid = vap->va_fileid =
 456                 vap->va_blocksize = vap->va_rdev =
 457                 vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
 458                 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
 459                 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
 460                 vap->va_flags = vap->va_gen = VNOVAL;
 461         vap->va_vaflags = 0;
 462 }
 463
 464 /*
 465  * Routines having to do with the management of the vnode table.
 466  */
 467 extern int (**dead_vnodeop_p)(void *);
 468 static void vclean __P((struct vnode *vp, int flag, struct proc *p));
 469 extern void vgonel __P((struct vnode *vp, struct proc *p));
 470 long numvnodes, freevnodes;
 471 long inactivevnodes;
 472 long vnode_reclaim_tried;
 473 long vnode_objects_reclaimed;
 474
 475
 476 extern struct vattr va_null;
 477
 478 /*
 479  * Return the next vnode from the free list.
 480  */
 481 int
 482 getnewvnode(tag, mp, vops, vpp)
 483         enum vtagtype tag;
 484         struct mount *mp;
 485         int (**vops)(void *);
 486         struct vnode **vpp;
 487 {
 488         struct proc *p = current_proc();        /* XXX */
 489         struct vnode *vp;
 490         int cnt, didretry = 0;
 491         static int reused = 0;                          /* track the reuse rate */
 492         int reclaimhits = 0;
 493
 494 retry:
 495         simple_lock(&vnode_free_list_slock);
 496         /*
 497          * MALLOC a vnode if the number of vnodes has not reached the desired
 498          * value and the number on the free list is still reasonable...
 499          * reuse from the freelist even though we may evict a name cache entry
 500          * to reduce the number of vnodes that accumulate.... vnodes tie up
 501          * wired memory and are never garbage collected
 502          */
 503         if (numvnodes < desiredvnodes && (freevnodes < (2 * VNODE_FREE_MIN))) {
 504                 numvnodes++;
 505                 simple_unlock(&vnode_free_list_slock);
 506                 MALLOC_ZONE(vp, struct vnode *, sizeof *vp, M_VNODE, M_WAITOK);
 507                 bzero((char *)vp, sizeof *vp);
 508                 VLISTNONE(vp);          /* avoid double queue removal */
 509                 simple_lock_init(&vp->v_interlock);
 510                 goto done;
 511         }
 512
 513         /*
 514          * Once the desired number of vnodes are allocated,
 515          * we start reusing the vnodes.
 516          */
 517         if (freevnodes < VNODE_FREE_MIN) {
 518                 /*
 519                  * if we are low on vnodes on the freelist attempt to get
 520                  * some back from the inactive list and VM object cache
 521                  */
 522                 simple_unlock(&vnode_free_list_slock);
 523                 (void)vnreclaim(vnodetarget);
 524                 simple_lock(&vnode_free_list_slock);
 525         }
 526         if (numvnodes >= desiredvnodes && reused > VNODE_TOOMANY_REUSED) {
 527                 reused = 0;
 528                 if (freevnodes < VNODE_FREE_ENOUGH) {
 529                         simple_unlock(&vnode_free_list_slock);
 530                         (void)vnreclaim(vnodetarget);
 531                         simple_lock(&vnode_free_list_slock);
 532                 }
 533         }
 534
 535         for (cnt = 0, vp = vnode_free_list.tqh_first;
 536                         vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
 537                 if (simple_lock_try(&vp->v_interlock)) {
 538                         /* got the interlock */
 539                         if (ISSET(vp->v_flag, VORECLAIM)) {
 540                                 /* skip over the vnodes that are being reclaimed */
 541                                 simple_unlock(&vp->v_interlock);
 542                                 reclaimhits++;
 543                         } else
 544                         break;
 545         }
 546         }
 547
 548         /*
 549          * Unless this is a bad time of the month, at most
 550          * the first NCPUS items on the free list are
 551          * locked, so this is close enough to being empty.
 552          */
 553         if (vp == NULLVP) {
 554                 simple_unlock(&vnode_free_list_slock);
 555                 if (!(didretry++) && (vnreclaim(vnodetarget) > 0))
 556                         goto retry;
 557                 tablefull("vnode");
 558                 log(LOG_EMERG, "%d vnodes locked, %d desired, %d numvnodes, "
 559                         "%d free, %d inactive, %d being reclaimed\n",
 560                         cnt, desiredvnodes, numvnodes, freevnodes, inactivevnodes,
 561                         reclaimhits);
 562                 *vpp = 0;
 563                 return (ENFILE);
 564         }
 565
 566         if (vp->v_usecount)
 567                 panic("free vnode isn't: v_type = %d, v_usecount = %d?",
 568                                 vp->v_type, vp->v_usecount);
 569
 570         VREMFREE("getnewvnode", vp);
 571         reused++;
 572         simple_unlock(&vnode_free_list_slock);
 573         vp->v_lease = NULL;
 574         cache_purge(vp);
 575         if (vp->v_type != VBAD)
 576                 vgonel(vp, p);  /* clean and reclaim the vnode */
 577         else
 578                 simple_unlock(&vp->v_interlock);
 579 #if DIAGNOSTIC
 580         if (vp->v_data)
 581                 panic("cleaned vnode isn't");
 582         {
 583         int s = splbio();
 584         if (vp->v_numoutput)
 585                 panic("Clean vnode has pending I/O's");
 586         splx(s);
 587         }
 588 #endif
 589         if (UBCINFOEXISTS(vp))
 590                 panic("getnewvnode: ubcinfo not cleaned");
 591         else
 592                 vp->v_ubcinfo = 0;
 593
 594         vp->v_lastr = -1;
 595         vp->v_ralen = 0;
 596         vp->v_maxra = 0;
 597         vp->v_lastw = 0;
 598         vp->v_ciosiz = 0;
 599         vp->v_cstart = 0;
 600         vp->v_clen = 0;
 601         vp->v_socket = 0;
 602
 603 done:
 604         vp->v_flag = VSTANDARD;
 605         vp->v_type = VNON;
 606         vp->v_tag = tag;
 607         vp->v_op = vops;
 608         insmntque(vp, mp);
 609         *vpp = vp;
 610         vp->v_usecount = 1;
 611         vp->v_data = 0;
 612         return (0);
 613 }
 614
 615 /*
 616  * Move a vnode from one mount queue to another.
 617  */
 618 void
 619 insmntque(vp, mp)
 620         struct vnode *vp;
 621         struct mount *mp;
 622 {
 623
 624         simple_lock(&mntvnode_slock);
 625         /*
 626          * Delete from old mount point vnode list, if on one.
 627          */
 628         if (vp->v_mount != NULL)
 629                 LIST_REMOVE(vp, v_mntvnodes);
 630         /*
 631          * Insert into list of vnodes for the new mount point, if available.
 632          */
 633         if ((vp->v_mount = mp) != NULL)
 634                 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
 635         simple_unlock(&mntvnode_slock);
 636 }
 637
 638 __inline void
 639 vpwakeup(struct vnode *vp)
 640 {
 641         if (vp) {
 642                 if (--vp->v_numoutput < 0)
 643                         panic("vpwakeup: neg numoutput");
 644                 if ((vp->v_flag & VBWAIT || vp->v_flag & VTHROTTLED)
 645                     && vp->v_numoutput <= 0) {
 646                         vp->v_flag &= ~(VBWAIT|VTHROTTLED);
 647                         wakeup((caddr_t)&vp->v_numoutput);
 648                 }
 649         }
 650 }
 651
 652 /*
 653  * Update outstanding I/O count and do wakeup if requested.
 654  */
 655 void
 656 vwakeup(bp)
 657         register struct buf *bp;
 658 {
 659         CLR(bp->b_flags, B_WRITEINPROG);
 660         vpwakeup(bp->b_vp);
 661 }
 662
 663 /*
 664  * Flush out and invalidate all buffers associated with a vnode.
 665  * Called with the underlying object locked.
 666  */
 667 int
 668 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 669         register struct vnode *vp;
 670         int flags;
 671         struct ucred *cred;
 672         struct proc *p;
 673         int slpflag, slptimeo;
 674 {
 675         register struct buf *bp;
 676         struct buf *nbp, *blist;
 677         int s, error = 0;
 678
 679         if (flags & V_SAVE) {
 680                 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) {
 681                         return (error);
 682                 }
 683
 684                 // XXXdbg - if there are dirty bufs, wait for 'em if they're busy
 685                 for (bp=vp->v_dirtyblkhd.lh_first; bp; bp=nbp) {
 686                     nbp = bp->b_vnbufs.le_next;
 687                     if (ISSET(bp->b_flags, B_BUSY)) {
 688                         SET(bp->b_flags, B_WANTED);
 689                         tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), "vinvalbuf", 0);
 690                         nbp = vp->v_dirtyblkhd.lh_first;
 691                     } else {
 692                         panic("vinvalbuf: dirty buf (vp 0x%x, bp 0x%x)", vp, bp);
 693                     }
 694                 }
 695         }
 696
 697         for (;;) {
 698                 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
 699                         while (blist && blist->b_lblkno < 0)
 700                                 blist = blist->b_vnbufs.le_next;
 701                 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
 702                     (flags & V_SAVEMETA))
 703                         while (blist && blist->b_lblkno < 0)
 704                                 blist = blist->b_vnbufs.le_next;
 705                 if (!blist)
 706                         break;
 707
 708                 for (bp = blist; bp; bp = nbp) {
 709                         nbp = bp->b_vnbufs.le_next;
 710                         if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
 711                                 continue;
 712                         s = splbio();
 713                         if (ISSET(bp->b_flags, B_BUSY)) {
 714                                 SET(bp->b_flags, B_WANTED);
 715                                 error = tsleep((caddr_t)bp,
 716                                         slpflag | (PRIBIO + 1), "vinvalbuf",
 717                                         slptimeo);
 718                                 splx(s);
 719                                 if (error) {
 720                                         return (error);
 721                                 }
 722                                 break;
 723                         }
 724                         bremfree(bp);
 725                         SET(bp->b_flags, B_BUSY);
 726                         splx(s);
 727                         /*
 728                          * XXX Since there are no node locks for NFS, I believe
 729                          * there is a slight chance that a delayed write will
 730                          * occur while sleeping just above, so check for it.
 731                          */
 732                         if (ISSET(bp->b_flags, B_DELWRI) && (flags & V_SAVE)) {
 733                                 (void) VOP_BWRITE(bp);
 734                                 break;
 735                         }
 736
 737                         if (bp->b_flags & B_LOCKED) {
 738                                 panic("vinvalbuf: bp @ 0x%x is locked!\n", bp);
 739                                 break;
 740                         } else {
 741                                 SET(bp->b_flags, B_INVAL);
 742                         }
 743                         brelse(bp);
 744                 }
 745         }
 746         if (!(flags & V_SAVEMETA) &&
 747             (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
 748                 panic("vinvalbuf: flush failed");
 749         return (0);
 750 }
 751
 752 /*
 753  * Create a vnode for a block device.
 754  * Used for root filesystem, argdev, and swap areas.
 755  * Also used for memory file system special devices.
 756  */
 757 int
 758 bdevvp(dev, vpp)
 759         dev_t dev;
 760         struct vnode **vpp;
 761 {
 762         register struct vnode *vp;
 763         struct vnode *nvp;
 764         int error;
 765
 766         if (dev == NODEV) {
 767                 *vpp = NULLVP;
 768                 return (ENODEV);
 769         }
 770         error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
 771         if (error) {
 772                 *vpp = NULLVP;
 773                 return (error);
 774         }
 775         vp = nvp;
 776         vp->v_type = VBLK;
 777         if (nvp = checkalias(vp, dev, (struct mount *)0)) {
 778                 vput(vp);
 779                 vp = nvp;
 780         }
 781         *vpp = vp;
 782         return (0);
 783 }
 784
 785 /*
 786  * Check to see if the new vnode represents a special device
 787  * for which we already have a vnode (either because of
 788  * bdevvp() or because of a different vnode representing
 789  * the same block device). If such an alias exists, deallocate
 790  * the existing contents and return the aliased vnode. The
 791  * caller is responsible for filling it with its new contents.
 792  */
 793 struct vnode *
 794 checkalias(nvp, nvp_rdev, mp)
 795         register struct vnode *nvp;
 796         dev_t nvp_rdev;
 797         struct mount *mp;
 798 {
 799         struct proc *p = current_proc();        /* XXX */
 800         struct vnode *vp;
 801         struct vnode **vpp;
 802         struct specinfo * bufhold;
 803         int buffree = 1;
 804
 805         if (nvp->v_type != VBLK && nvp->v_type != VCHR)
 806                 return (NULLVP);
 807
 808         bufhold = (struct specinfo *)_MALLOC_ZONE(sizeof(struct specinfo),
 809                         M_VNODE, M_WAITOK);
 810         vpp = &speclisth[SPECHASH(nvp_rdev)];
 811 loop:
 812         simple_lock(&spechash_slock);
 813         for (vp = *vpp; vp; vp = vp->v_specnext) {
 814                 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
 815                         continue;
 816                 /*
 817                  * Alias, but not in use, so flush it out.
 818                  */
 819                 simple_lock(&vp->v_interlock);
 820                 if (vp->v_usecount == 0) {
 821                         simple_unlock(&spechash_slock);
 822                         vgonel(vp, p);
 823                         goto loop;
 824                 }
 825                 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
 826                         simple_unlock(&spechash_slock);
 827                         goto loop;
 828                 }
 829                 break;
 830         }
 831         if (vp == NULL || vp->v_tag != VT_NON) {
 832                 nvp->v_specinfo = bufhold;
 833                 buffree = 0;    /* buffer used */
 834                 bzero(nvp->v_specinfo, sizeof(struct specinfo));
 835                 nvp->v_rdev = nvp_rdev;
 836                 nvp->v_hashchain = vpp;
 837                 nvp->v_specnext = *vpp;
 838                 nvp->v_specflags = 0;
 839                 simple_unlock(&spechash_slock);
 840                 *vpp = nvp;
 841                 if (vp != NULLVP) {
 842                         nvp->v_flag |= VALIASED;
 843                         vp->v_flag |= VALIASED;
 844                         vput(vp);
 845                 }
 846                 /* Since buffer is used just return */
 847                 return (NULLVP);
 848         }
 849         simple_unlock(&spechash_slock);
 850         VOP_UNLOCK(vp, 0, p);
 851         simple_lock(&vp->v_interlock);
 852         vclean(vp, 0, p);
 853         vp->v_op = nvp->v_op;
 854         vp->v_tag = nvp->v_tag;
 855         nvp->v_type = VNON;
 856         insmntque(vp, mp);
 857         if (buffree)
 858                 _FREE_ZONE((void *)bufhold, sizeof (struct specinfo), M_VNODE);
 859         return (vp);
 860 }
 861
 862 /*
 863  * Get a reference on a particular vnode and lock it if requested.
 864  * If the vnode was on the inactive list, remove it from the list.
 865  * If the vnode was on the free list, remove it from the list and
 866  * move it to inactive list as needed.
 867  * The vnode lock bit is set if the vnode is being eliminated in
 868  * vgone. The process is awakened when the transition is completed,
 869  * and an error returned to indicate that the vnode is no longer
 870  * usable (possibly having been changed to a new file system type).
 871  */
 872 int
 873 vget(vp, flags, p)
 874         struct vnode *vp;
 875         int flags;
 876         struct proc *p;
 877 {
 878         int error = 0;
 879
 880 retry:
 881
 882         /*
 883          * If the vnode is in the process of being cleaned out for
 884          * another use, we wait for the cleaning to finish and then
 885          * return failure. Cleaning is determined by checking that
 886          * the VXLOCK flag is set.
 887          */
 888         if ((flags & LK_INTERLOCK) == 0)
 889                 simple_lock(&vp->v_interlock);
 890         if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
 891                 vp->v_flag |= VXWANT;
 892                 simple_unlock(&vp->v_interlock);
 893                 (void)tsleep((caddr_t)vp, PINOD, "vget", 0);
 894                 return (ENOENT);
 895         }
 896
 897         /*
 898          * vnode is being terminated.
 899          * wait for vnode_pager_no_senders() to clear VTERMINATE
 900          */
 901         if (ISSET(vp->v_flag, VTERMINATE)) {
 902                 SET(vp->v_flag, VTERMWANT);
 903                 simple_unlock(&vp->v_interlock);
 904                 (void)tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vclean", 0);
 905                 return (ENOENT);
 906         }
 907
 908         /*
 909          * if the vnode is being initialized,
 910          * wait for it to finish initialization
 911          */
 912         if (ISSET(vp->v_flag,  VUINIT)) {
 913                 if (ISSET(vp->v_flag,  VUINIT)) {
 914                         SET(vp->v_flag, VUWANT);
 915                         simple_unlock(&vp->v_interlock);
 916                         (void) tsleep((caddr_t)vp, PINOD, "vget2", 0);
 917                         goto retry;
 918                 }
 919         }
 920
 921         simple_lock(&vnode_free_list_slock);
 922         if (vp->v_usecount == 0) {
 923                 /* If on the free list, remove it from there */
 924                 if (VONLIST(vp))
 925                         VREMFREE("vget", vp);
 926         } else {
 927                 /* If on the inactive list, remove it from there */
 928                 if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
 929                         if (VONLIST(vp))
 930                                 VREMINACTIVE("vget", vp);
 931                 }
 932         }
 933
 934         /* The vnode should not be on the inactive list here */
 935         VINACTIVECHECK("vget", vp, 0);
 936
 937         simple_unlock(&vnode_free_list_slock);
 938
 939         if (++vp->v_usecount <= 0)
 940                 panic("vget: v_usecount");
 941
 942         /*
 943          * Recover named reference as needed
 944          */
 945         if (UBCISVALID(vp) && !ubc_issetflags(vp, UI_HASOBJREF)) {
 946                 simple_unlock(&vp->v_interlock);
 947                 if (ubc_getobject(vp, UBC_HOLDOBJECT)) {
 948                         error = ENOENT;
 949                         goto errout;
 950                 }
 951                 simple_lock(&vp->v_interlock);
 952         }
 953
 954         if (flags & LK_TYPE_MASK) {
 955                 if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
 956                         goto errout;
 957                 return (0);
 958         }
 959
 960         if ((flags & LK_INTERLOCK) == 0)
 961                 simple_unlock(&vp->v_interlock);
 962         return (0);
 963
 964 errout:
 965         /*
 966          * If the vnode was not active in the first place
 967          * must not call vrele() as VOP_INACTIVE() is not
 968          * required.
 969          * So inlined part of vrele() here.
 970          */
 971         simple_lock(&vp->v_interlock);
 972         if (--vp->v_usecount == 1) {
 973                 if (UBCINFOEXISTS(vp)) {
 974                         vinactive(vp);
 975                         simple_unlock(&vp->v_interlock);
 976                         return (error);
 977                 }
 978         }
 979         if (vp->v_usecount > 0) {
 980                 simple_unlock(&vp->v_interlock);
 981                 return (error);
 982         }
 983         if (vp->v_usecount < 0)
 984                 panic("vget: negative usecount (%d)", vp->v_usecount);
 985         vfree(vp);
 986         simple_unlock(&vp->v_interlock);
 987         return (error);
 988 }
 989
 990 /*
 991  * Get a pager reference on the particular vnode.
 992  *
 993  * This is called from ubc_info_init() and it is asumed that
 994  * the vnode is neither on the free list on on the inactive list.
 995  * It is also assumed that the vnode is neither being recycled
 996  * by vgonel nor being terminated by vnode_pager_vrele().
 997  *
 998  * The vnode interlock is NOT held by the caller.
 999  */
1000 __private_extern__ int
1001 vnode_pager_vget(vp)
1002         struct vnode *vp;
1003 {
1004         simple_lock(&vp->v_interlock);
1005         if (UBCINFOMISSING(vp))
1006                 panic("vnode_pager_vget: stolen ubc_info");
1007
1008         if (!UBCINFOEXISTS(vp))
1009                 panic("vnode_pager_vget: lost ubc_info");
1010
1011         if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM))
1012                 panic("vnode_pager_vget: already being reclaimd");
1013
1014         if (ISSET(vp->v_flag, VTERMINATE))
1015                 panic("vnode_pager_vget: already being terminated");
1016
1017         simple_lock(&vnode_free_list_slock);
1018         /* The vnode should not be on ANY list */
1019         if (VONLIST(vp))
1020                 panic("vnode_pager_vget: still on the list");
1021
1022         /* The vnode should not be on the inactive list here */
1023         VINACTIVECHECK("vnode_pager_vget", vp, 0);
1024         simple_unlock(&vnode_free_list_slock);
1025
1026         /* After all those checks, now do the real work :-) */
1027         if (++vp->v_usecount <= 0)
1028                 panic("vnode_pager_vget: v_usecount");
1029         simple_unlock(&vp->v_interlock);
1030
1031         return (0);
1032 }
1033
1034 /*
1035  * Stubs to use when there is no locking to be done on the underlying object.
1036  * A minimal shared lock is necessary to ensure that the underlying object
1037  * is not revoked while an operation is in progress. So, an active shared
1038  * count is maintained in an auxillary vnode lock structure.
1039  */
1040 int
1041 vop_nolock(ap)
1042         struct vop_lock_args /* {
1043                 struct vnode *a_vp;
1044                 int a_flags;
1045                 struct proc *a_p;
1046         } */ *ap;
1047 {
1048 #ifdef notyet
1049         /*
1050          * This code cannot be used until all the non-locking filesystems
1051          * (notably NFS) are converted to properly lock and release nodes.
1052          * Also, certain vnode operations change the locking state within
1053          * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
1054          * and symlink). Ideally these operations should not change the
1055          * lock state, but should be changed to let the caller of the
1056          * function unlock them. Otherwise all intermediate vnode layers
1057          * (such as union, umapfs, etc) must catch these functions to do
1058          * the necessary locking at their layer. Note that the inactive
1059          * and lookup operations also change their lock state, but this
1060          * cannot be avoided, so these two operations will always need
1061          * to be handled in intermediate layers.
1062          */
1063         struct vnode *vp = ap->a_vp;
1064         int vnflags, flags = ap->a_flags;
1065
1066         if (vp->v_vnlock == NULL) {
1067                 if ((flags & LK_TYPE_MASK) == LK_DRAIN)
1068                         return (0);
1069                 MALLOC_ZONE(vp->v_vnlock, struct lock__bsd__ *,
1070                                 sizeof(struct lock__bsd__), M_VNODE, M_WAITOK);
1071                 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1072         }
1073         switch (flags & LK_TYPE_MASK) {
1074         case LK_DRAIN:
1075                 vnflags = LK_DRAIN;
1076                 break;
1077         case LK_EXCLUSIVE:
1078         case LK_SHARED:
1079                 vnflags = LK_SHARED;
1080                 break;
1081         case LK_UPGRADE:
1082         case LK_EXCLUPGRADE:
1083         case LK_DOWNGRADE:
1084                 return (0);
1085         case LK_RELEASE:
1086         default:
1087                 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
1088         }
1089         if (flags & LK_INTERLOCK)
1090                 vnflags |= LK_INTERLOCK;
1091         return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
1092 #else /* for now */
1093         /*
1094          * Since we are not using the lock manager, we must clear
1095          * the interlock here.
1096          */
1097         if (ap->a_flags & LK_INTERLOCK)
1098                 simple_unlock(&ap->a_vp->v_interlock);
1099         return (0);
1100 #endif
1101 }
1102
1103 /*
1104  * Decrement the active use count.
1105  */
1106 int
1107 vop_nounlock(ap)
1108         struct vop_unlock_args /* {
1109                 struct vnode *a_vp;
1110                 int a_flags;
1111                 struct proc *a_p;
1112         } */ *ap;
1113 {
1114         struct vnode *vp = ap->a_vp;
1115
1116         if (vp->v_vnlock == NULL)
1117                 return (0);
1118         return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p));
1119 }
1120
1121 /*
1122  * Return whether or not the node is in use.
1123  */
1124 int
1125 vop_noislocked(ap)
1126         struct vop_islocked_args /* {
1127                 struct vnode *a_vp;
1128         } */ *ap;
1129 {
1130         struct vnode *vp = ap->a_vp;
1131
1132         if (vp->v_vnlock == NULL)
1133                 return (0);
1134         return (lockstatus(vp->v_vnlock));
1135 }
1136
1137 /*
1138  * Vnode reference.
1139  */
1140 void
1141 vref(vp)
1142         struct vnode *vp;
1143 {
1144
1145         simple_lock(&vp->v_interlock);
1146         if (vp->v_usecount <= 0)
1147                 panic("vref used where vget required");
1148
1149         /* If on the inactive list, remove it from there */
1150         if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
1151                 if (VONLIST(vp)) {
1152                         simple_lock(&vnode_free_list_slock);
1153                         VREMINACTIVE("vref", vp);
1154                         simple_unlock(&vnode_free_list_slock);
1155                 }
1156         }
1157         /* The vnode should not be on the inactive list here */
1158         VINACTIVECHECK("vref", vp, 0);
1159
1160         if (++vp->v_usecount <= 0)
1161                 panic("vref v_usecount");
1162         simple_unlock(&vp->v_interlock);
1163 }
1164
1165 /*
1166  * put the vnode on appropriate free list.
1167  * called with v_interlock held.
1168  */
1169 static void
1170 vfree(vp)
1171         struct vnode *vp;
1172 {
1173         /*
1174          * if the vnode is not obtained by calling getnewvnode() we
1175          * are not responsible for the cleanup. Just return.
1176          */
1177         if (!(vp->v_flag & VSTANDARD)) {
1178                 return;
1179         }
1180
1181         if (vp->v_usecount != 0)
1182                 panic("vfree: v_usecount");
1183
1184         /* insert at tail of LRU list or at head if VAGE is set */
1185         simple_lock(&vnode_free_list_slock);
1186
1187         if (VONLIST(vp))
1188                  panic("vfree: vnode still on list");
1189
1190         if (vp->v_flag & VAGE) {
1191                 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1192                 vp->v_flag &= ~VAGE;
1193         } else
1194                 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1195         freevnodes++;
1196         simple_unlock(&vnode_free_list_slock);
1197         return;
1198 }
1199
1200 /*
1201  * put the vnode on the inactive list.
1202  * called with v_interlock held
1203  */
1204 static void
1205 vinactive(vp)
1206         struct vnode *vp;
1207 {
1208         if (!UBCINFOEXISTS(vp))
1209                 panic("vinactive: not a UBC vnode");
1210
1211         if (vp->v_usecount != 1)
1212                 panic("vinactive: v_usecount");
1213
1214         simple_lock(&vnode_free_list_slock);
1215
1216         if (VONLIST(vp))
1217                  panic("vinactive: vnode still on list");
1218         VINACTIVECHECK("vinactive", vp, 0);
1219
1220         TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_freelist);
1221         SET(vp->v_flag, VUINACTIVE);
1222         CLR(vp->v_flag, (VNOCACHE_DATA | VRAOFF));
1223
1224         inactivevnodes++;
1225         simple_unlock(&vnode_free_list_slock);
1226         return;
1227 }
1228
1229
1230 /*
1231  * vput(), just unlock and vrele()
1232  */
1233 void
1234 vput(vp)
1235         struct vnode *vp;
1236 {
1237         struct proc *p = current_proc();        /* XXX */
1238
1239         simple_lock(&vp->v_interlock);
1240         if (--vp->v_usecount == 1) {
1241                 if (UBCINFOEXISTS(vp)) {
1242                         vinactive(vp);
1243                         simple_unlock(&vp->v_interlock);
1244                         VOP_UNLOCK(vp, 0, p);
1245                         return;
1246                 }
1247         }
1248         if (vp->v_usecount > 0) {
1249                 simple_unlock(&vp->v_interlock);
1250                 VOP_UNLOCK(vp, 0, p);
1251                 return;
1252         }
1253 #if DIAGNOSTIC
1254         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1255                 vprint("vput: bad ref count", vp);
1256                 panic("vput: v_usecount = %d, v_writecount = %d",
1257                         vp->v_usecount, vp->v_writecount);
1258         }
1259 #endif
1260         if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
1261                 VREMINACTIVE("vrele", vp);
1262
1263         simple_unlock(&vp->v_interlock);
1264         VOP_INACTIVE(vp, p);
1265         /*
1266          * The interlock is not held and
1267          * VOP_INCATIVE releases the vnode lock.
1268          * We could block and the vnode might get reactivated
1269          * Can not just call vfree without checking the state
1270          */
1271         simple_lock(&vp->v_interlock);
1272         if (!VONLIST(vp)) {
1273                 if (vp->v_usecount == 0)
1274                         vfree(vp);
1275                 else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp))
1276                         vinactive(vp);
1277         }
1278         simple_unlock(&vp->v_interlock);
1279 }
1280
1281 /*
1282  * Vnode release.
1283  * If count drops to zero, call inactive routine and return to freelist.
1284  */
1285 void
1286 vrele(vp)
1287         struct vnode *vp;
1288 {
1289         struct proc *p = current_proc();        /* XXX */
1290
1291         simple_lock(&vp->v_interlock);
1292         if (--vp->v_usecount == 1) {
1293                 if (UBCINFOEXISTS(vp)) {
1294                         vinactive(vp);
1295                         simple_unlock(&vp->v_interlock);
1296                         return;
1297                 }
1298         }
1299         if (vp->v_usecount > 0) {
1300                 simple_unlock(&vp->v_interlock);
1301                 return;
1302         }
1303 #if DIAGNOSTIC
1304         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1305                 vprint("vrele: bad ref count", vp);
1306                 panic("vrele: ref cnt");
1307         }
1308 #endif
1309         if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
1310                 VREMINACTIVE("vrele", vp);
1311
1312
1313         if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
1314                 /* vnode is being cleaned, just return */
1315                 vfree(vp);
1316                 simple_unlock(&vp->v_interlock);
1317                 return;
1318         }
1319
1320         if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1321                 VOP_INACTIVE(vp, p);
1322                 /*
1323                  * vn_lock releases the interlock and
1324                  * VOP_INCATIVE releases the vnode lock.
1325                  * We could block and the vnode might get reactivated
1326                  * Can not just call vfree without checking the state
1327                  */
1328                 simple_lock(&vp->v_interlock);
1329                 if (!VONLIST(vp)) {
1330                         if (vp->v_usecount == 0)
1331                                 vfree(vp);
1332                         else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp))
1333                                 vinactive(vp);
1334                 }
1335                 simple_unlock(&vp->v_interlock);
1336         }
1337 #if 0
1338         else {
1339                 vfree(vp);
1340                 simple_unlock(&vp->v_interlock);
1341                 kprintf("vrele: vn_lock() failed for vp = 0x%08x\n", vp);
1342         }
1343 #endif
1344 }
1345
1346 void
1347 vagevp(vp)
1348         struct vnode *vp;
1349 {
1350         simple_lock(&vp->v_interlock);
1351         vp->v_flag |= VAGE;
1352         simple_unlock(&vp->v_interlock);
1353         return;
1354 }
1355
1356 /*
1357  * Page or buffer structure gets a reference.
1358  */
1359 void
1360 vhold(vp)
1361         register struct vnode *vp;
1362 {
1363
1364         simple_lock(&vp->v_interlock);
1365         vp->v_holdcnt++;
1366         simple_unlock(&vp->v_interlock);
1367 }
1368
1369 /*
1370  * Page or buffer structure frees a reference.
1371  */
1372 void
1373 holdrele(vp)
1374         register struct vnode *vp;
1375 {
1376
1377         simple_lock(&vp->v_interlock);
1378         if (vp->v_holdcnt <= 0)
1379                 panic("holdrele: holdcnt");
1380         vp->v_holdcnt--;
1381         simple_unlock(&vp->v_interlock);
1382 }
1383
1384 /*
1385  * Remove any vnodes in the vnode table belonging to mount point mp.
1386  *
1387  * If MNT_NOFORCE is specified, there should not be any active ones,
1388  * return error if any are found (nb: this is a user error, not a
1389  * system error). If MNT_FORCE is specified, detach any active vnodes
1390  * that are found.
1391  */
1392 #if DIAGNOSTIC
1393 int busyprt = 0;        /* print out busy vnodes */
1394 #if 0
1395 struct ctldebug debug1 = { "busyprt", &busyprt };
1396 #endif /* 0 */
1397 #endif
1398
1399 int
1400 vflush(mp, skipvp, flags)
1401         struct mount *mp;
1402         struct vnode *skipvp;
1403         int flags;
1404 {
1405         struct proc *p = current_proc();
1406         struct vnode *vp, *nvp;
1407         int busy = 0;
1408
1409         simple_lock(&mntvnode_slock);
1410 loop:
1411         for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1412                 if (vp->v_mount != mp)
1413                         goto loop;
1414                 nvp = vp->v_mntvnodes.le_next;
1415                 /*
1416                  * Skip over a selected vnode.
1417                  */
1418                 if (vp == skipvp)
1419                         continue;
1420
1421                 simple_lock(&vp->v_interlock);
1422                 /*
1423                  * Skip over a vnodes marked VSYSTEM or VNOFLUSH.
1424                  */
1425                 if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || (vp->v_flag & VNOFLUSH))) {
1426                         simple_unlock(&vp->v_interlock);
1427                         continue;
1428                 }
1429                 /*
1430                  * Skip over a vnodes marked VSWAP.
1431                  */
1432                 if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) {
1433                         simple_unlock(&vp->v_interlock);
1434                         continue;
1435                 }
1436                 /*
1437                  * If WRITECLOSE is set, only flush out regular file
1438                  * vnodes open for writing.
1439                  */
1440                 if ((flags & WRITECLOSE) &&
1441                     (vp->v_writecount == 0 || vp->v_type != VREG)) {
1442                         simple_unlock(&vp->v_interlock);
1443                         continue;
1444                 }
1445                 /*
1446                  * With v_usecount == 0, all we need to do is clear
1447                  * out the vnode data structures and we are done.
1448                  */
1449                 if (vp->v_usecount == 0) {
1450                         simple_unlock(&mntvnode_slock);
1451                         vgonel(vp, p);
1452                         simple_lock(&mntvnode_slock);
1453                         continue;
1454                 }
1455                 /*
1456                  * If FORCECLOSE is set, forcibly close the vnode.
1457                  * For block or character devices, revert to an
1458                  * anonymous device. For all other files, just kill them.
1459                  */
1460                 if (flags & FORCECLOSE) {
1461                         simple_unlock(&mntvnode_slock);
1462                         if (vp->v_type != VBLK && vp->v_type != VCHR) {
1463                                 vgonel(vp, p);
1464                         } else {
1465                                 vclean(vp, 0, p);
1466                                 vp->v_op = spec_vnodeop_p;
1467                                 insmntque(vp, (struct mount *)0);
1468                         }
1469                         simple_lock(&mntvnode_slock);
1470                         continue;
1471                 }
1472 #if DIAGNOSTIC
1473                 if (busyprt)
1474                         vprint("vflush: busy vnode", vp);
1475 #endif
1476                 simple_unlock(&vp->v_interlock);
1477                 busy++;
1478         }
1479         simple_unlock(&mntvnode_slock);
1480         if (busy && ((flags & FORCECLOSE)==0))
1481                 return (EBUSY);
1482         return (0);
1483 }
1484
1485 /*
1486  * Disassociate the underlying file system from a vnode.
1487  * The vnode interlock is held on entry.
1488  */
1489 static void
1490 vclean(vp, flags, p)
1491         struct vnode *vp;
1492         int flags;
1493         struct proc *p;
1494 {
1495         int active;
1496         int removed = 0;
1497         int didhold;
1498
1499         /*
1500          * if the vnode is not obtained by calling getnewvnode() we
1501          * are not responsible for the cleanup. Just return.
1502          */
1503         if (!(vp->v_flag & VSTANDARD)) {
1504                 simple_unlock(&vp->v_interlock);
1505                 return;
1506         }
1507
1508         /*
1509          * Check to see if the vnode is in use.
1510          * If so we have to reference it before we clean it out
1511          * so that its count cannot fall to zero and generate a
1512          * race against ourselves to recycle it.
1513          */
1514         if (active = vp->v_usecount)
1515                 if (++vp->v_usecount <= 0)
1516                         panic("vclean: v_usecount");
1517         /*
1518          * Prevent the vnode from being recycled or
1519          * brought into use while we clean it out.
1520          */
1521         if (vp->v_flag & VXLOCK)
1522                 panic("vclean: deadlock");
1523         vp->v_flag |= VXLOCK;
1524
1525         /*
1526          * Even if the count is zero, the VOP_INACTIVE routine may still
1527          * have the object locked while it cleans it out. The VOP_LOCK
1528          * ensures that the VOP_INACTIVE routine is done with its work.
1529          * For active vnodes, it ensures that no other activity can
1530          * occur while the underlying object is being cleaned out.
1531          */
1532         VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1533
1534         /*
1535          * if this vnode is on the inactive list
1536          * take it off the list.
1537          */
1538         if ((active == 1) &&
1539                 (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))) {
1540                 simple_lock(&vnode_free_list_slock);
1541                 VREMINACTIVE("vclean", vp);
1542                 simple_unlock(&vnode_free_list_slock);
1543                 removed++;
1544         }
1545
1546         /* Clean the pages in VM. */
1547         if (active && (flags & DOCLOSE))
1548                 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1549
1550         /* Clean the pages in VM. */
1551         didhold = ubc_hold(vp);
1552         if ((active) && (didhold))
1553                 (void)ubc_clean(vp, 0); /* do not invalidate */
1554
1555         /*
1556          * Clean out any buffers associated with the vnode.
1557          */
1558         if (flags & DOCLOSE) {
1559                 if (vp->v_tag == VT_NFS)
1560             nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
1561         else
1562             vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1563     }
1564
1565         if (active)
1566                 VOP_INACTIVE(vp, p);
1567         else
1568                 VOP_UNLOCK(vp, 0, p);
1569
1570         /* Destroy ubc named reference */
1571     if (didhold) {
1572         ubc_rele(vp);
1573                 ubc_destroy_named(vp);
1574         }
1575
1576         /*
1577          * Reclaim the vnode.
1578          */
1579         if (VOP_RECLAIM(vp, p))
1580                 panic("vclean: cannot reclaim");
1581         cache_purge(vp);
1582         if (vp->v_vnlock) {
1583                 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1584                         vprint("vclean: lock not drained", vp);
1585                 FREE_ZONE(vp->v_vnlock, sizeof (struct lock__bsd__), M_VNODE);
1586                 vp->v_vnlock = NULL;
1587         }
1588
1589         /* It's dead, Jim! */
1590         vp->v_op = dead_vnodeop_p;
1591         vp->v_tag = VT_NON;
1592
1593         /*
1594          * Done with purge, notify sleepers of the grim news.
1595          */
1596         vp->v_flag &= ~VXLOCK;
1597         if (vp->v_flag & VXWANT) {
1598                 vp->v_flag &= ~VXWANT;
1599                 wakeup((caddr_t)vp);
1600         }
1601
1602         if (active)
1603                 vrele(vp);
1604 }
1605
1606 /*
1607  * Eliminate all activity associated with  the requested vnode
1608  * and with all vnodes aliased to the requested vnode.
1609  */
1610 int
1611 vop_revoke(ap)
1612         struct vop_revoke_args /* {
1613                 struct vnode *a_vp;
1614                 int a_flags;
1615         } */ *ap;
1616 {
1617         struct vnode *vp, *vq;
1618         struct proc *p = current_proc();
1619
1620 #if DIAGNOSTIC
1621         if ((ap->a_flags & REVOKEALL) == 0)
1622                 panic("vop_revoke");
1623 #endif
1624
1625         vp = ap->a_vp;
1626         simple_lock(&vp->v_interlock);
1627
1628         if (vp->v_flag & VALIASED) {
1629                 /*
1630                  * If a vgone (or vclean) is already in progress,
1631                  * wait until it is done and return.
1632                  */
1633                 if (vp->v_flag & VXLOCK) {
1634                         while (vp->v_flag & VXLOCK) {
1635                                 vp->v_flag |= VXWANT;
1636                                 simple_unlock(&vp->v_interlock);
1637                                 (void)tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1638                         }
1639                         return (0);
1640                 }
1641                 /*
1642                  * Ensure that vp will not be vgone'd while we
1643                  * are eliminating its aliases.
1644                  */
1645                 vp->v_flag |= VXLOCK;
1646                 simple_unlock(&vp->v_interlock);
1647                 while (vp->v_flag & VALIASED) {
1648                         simple_lock(&spechash_slock);
1649                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1650                                 if (vq->v_rdev != vp->v_rdev ||
1651                                     vq->v_type != vp->v_type || vp == vq)
1652                                         continue;
1653                                 simple_unlock(&spechash_slock);
1654                                 vgone(vq);
1655                                 break;
1656                         }
1657                         if (vq == NULLVP)
1658                                 simple_unlock(&spechash_slock);
1659                 }
1660                 /*
1661                  * Remove the lock so that vgone below will
1662                  * really eliminate the vnode after which time
1663                  * vgone will awaken any sleepers.
1664                  */
1665                 simple_lock(&vp->v_interlock);
1666                 vp->v_flag &= ~VXLOCK;
1667         }
1668         vgonel(vp, p);
1669         return (0);
1670 }
1671
1672 /*
1673  * Recycle an unused vnode to the front of the free list.
1674  * Release the passed interlock if the vnode will be recycled.
1675  */
1676 int
1677 vrecycle(vp, inter_lkp, p)
1678         struct vnode *vp;
1679         struct slock *inter_lkp;
1680         struct proc *p;
1681 {
1682
1683         simple_lock(&vp->v_interlock);
1684         if (vp->v_usecount == 0) {
1685                 if (inter_lkp)
1686                         simple_unlock(inter_lkp);
1687                 vgonel(vp, p);
1688                 return (1);
1689         }
1690         simple_unlock(&vp->v_interlock);
1691         return (0);
1692 }
1693
1694 /*
1695  * Eliminate all activity associated with a vnode
1696  * in preparation for reuse.
1697  */
1698 void
1699 vgone(vp)
1700         struct vnode *vp;
1701 {
1702         struct proc *p = current_proc();
1703
1704         simple_lock(&vp->v_interlock);
1705         vgonel(vp, p);
1706 }
1707
1708 /*
1709  * vgone, with the vp interlock held.
1710  */
1711 void
1712 vgonel(vp, p)
1713         struct vnode *vp;
1714         struct proc *p;
1715 {
1716         struct vnode *vq;
1717         struct vnode *vx;
1718
1719         /*
1720          * if the vnode is not obtained by calling getnewvnode() we
1721          * are not responsible for the cleanup. Just return.
1722          */
1723         if (!(vp->v_flag & VSTANDARD)) {
1724                 simple_unlock(&vp->v_interlock);
1725                 return;
1726         }
1727
1728         /*
1729          * If a vgone (or vclean) is already in progress,
1730          * wait until it is done and return.
1731          */
1732         if (vp->v_flag & VXLOCK) {
1733                 while (vp->v_flag & VXLOCK) {
1734                         vp->v_flag |= VXWANT;
1735                         simple_unlock(&vp->v_interlock);
1736                         (void)tsleep((caddr_t)vp, PINOD, "vgone", 0);
1737                 }
1738                 return;
1739         }
1740         /*
1741          * Clean out the filesystem specific data.
1742          */
1743         vclean(vp, DOCLOSE, p);
1744         /*
1745          * Delete from old mount point vnode list, if on one.
1746          */
1747         if (vp->v_mount != NULL)
1748                 insmntque(vp, (struct mount *)0);
1749         /*
1750          * If special device, remove it from special device alias list
1751          * if it is on one.
1752          */
1753         if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1754                 simple_lock(&spechash_slock);
1755                 if (*vp->v_hashchain == vp) {
1756                         *vp->v_hashchain = vp->v_specnext;
1757                 } else {
1758                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1759                                 if (vq->v_specnext != vp)
1760                                         continue;
1761                                 vq->v_specnext = vp->v_specnext;
1762                                 break;
1763                         }
1764                         if (vq == NULL)
1765                                 panic("missing bdev");
1766                 }
1767                 if (vp->v_flag & VALIASED) {
1768                         vx = NULL;
1769                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1770                                 if (vq->v_rdev != vp->v_rdev ||
1771                                     vq->v_type != vp->v_type)
1772                                         continue;
1773                                 if (vx)
1774                                         break;
1775                                 vx = vq;
1776                         }
1777                         if (vx == NULL)
1778                                 panic("missing alias");
1779                         if (vq == NULL)
1780                                 vx->v_flag &= ~VALIASED;
1781                         vp->v_flag &= ~VALIASED;
1782                 }
1783                 simple_unlock(&spechash_slock);
1784                 FREE_ZONE(vp->v_specinfo, sizeof (struct specinfo), M_VNODE);
1785                 vp->v_specinfo = NULL;
1786         }
1787         /*
1788          * If it is on the freelist and not already at the head,
1789          * move it to the head of the list. The test of the back
1790          * pointer and the reference count of zero is because
1791          * it will be removed from the free list by getnewvnode,
1792          * but will not have its reference count incremented until
1793          * after calling vgone. If the reference count were
1794          * incremented first, vgone would (incorrectly) try to
1795          * close the previous instance of the underlying object.
1796          * So, the back pointer is explicitly set to `0xdeadb' in
1797          * getnewvnode after removing it from the freelist to ensure
1798          * that we do not try to move it here.
1799          */
1800         if (vp->v_usecount == 0) {
1801                 simple_lock(&vnode_free_list_slock);
1802                 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1803                     vnode_free_list.tqh_first != vp) {
1804                         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1805                         TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1806                 }
1807                 simple_unlock(&vnode_free_list_slock);
1808         }
1809         vp->v_type = VBAD;
1810 }
1811
1812 /*
1813  * Lookup a vnode by device number.
1814  */
1815 int
1816 vfinddev(dev, type, vpp)
1817         dev_t dev;
1818         enum vtype type;
1819         struct vnode **vpp;
1820 {
1821         struct vnode *vp;
1822         int rc = 0;
1823
1824         simple_lock(&spechash_slock);
1825         for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1826                 if (dev != vp->v_rdev || type != vp->v_type)
1827                         continue;
1828                 *vpp = vp;
1829                 rc = 1;
1830                 break;
1831         }
1832         simple_unlock(&spechash_slock);
1833         return (rc);
1834 }
1835
1836 /*
1837  * Calculate the total number of references to a special device.
1838  */
1839 int
1840 vcount(vp)
1841         struct vnode *vp;
1842 {
1843         struct vnode *vq, *vnext;
1844         int count;
1845
1846 loop:
1847         if ((vp->v_flag & VALIASED) == 0)
1848                 return (vp->v_usecount);
1849         simple_lock(&spechash_slock);
1850         for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1851                 vnext = vq->v_specnext;
1852                 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1853                         continue;
1854                 /*
1855                  * Alias, but not in use, so flush it out.
1856                  */
1857                 if (vq->v_usecount == 0 && vq != vp) {
1858                         simple_unlock(&spechash_slock);
1859                         vgone(vq);
1860                         goto loop;
1861                 }
1862                 count += vq->v_usecount;
1863         }
1864         simple_unlock(&spechash_slock);
1865         return (count);
1866 }
1867
1868 int     prtactive = 0;          /* 1 => print out reclaim of active vnodes */
1869
1870 /*
1871  * Print out a description of a vnode.
1872  */
1873 static char *typename[] =
1874    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1875
1876 void
1877 vprint(label, vp)
1878         char *label;
1879         register struct vnode *vp;
1880 {
1881         char buf[64];
1882
1883         if (label != NULL)
1884                 printf("%s: ", label);
1885         printf("type %s, usecount %d, writecount %d, refcount %d,",
1886                 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1887                 vp->v_holdcnt);
1888         buf[0] = '\0';
1889         if (vp->v_flag & VROOT)
1890                 strcat(buf, "|VROOT");
1891         if (vp->v_flag & VTEXT)
1892                 strcat(buf, "|VTEXT");
1893         if (vp->v_flag & VSYSTEM)
1894                 strcat(buf, "|VSYSTEM");
1895         if (vp->v_flag & VNOFLUSH)
1896                 strcat(buf, "|VNOFLUSH");
1897         if (vp->v_flag & VXLOCK)
1898                 strcat(buf, "|VXLOCK");
1899         if (vp->v_flag & VXWANT)
1900                 strcat(buf, "|VXWANT");
1901         if (vp->v_flag & VBWAIT)
1902                 strcat(buf, "|VBWAIT");
1903         if (vp->v_flag & VALIASED)
1904                 strcat(buf, "|VALIASED");
1905         if (buf[0] != '\0')
1906                 printf(" flags (%s)", &buf[1]);
1907         if (vp->v_data == NULL) {
1908                 printf("\n");
1909         } else {
1910                 printf("\n\t");
1911                 VOP_PRINT(vp);
1912         }
1913 }
1914
1915 #ifdef DEBUG
1916 /*
1917  * List all of the locked vnodes in the system.
1918  * Called when debugging the kernel.
1919  */
1920 void
1921 printlockedvnodes()
1922 {
1923         struct proc *p = current_proc();
1924         struct mount *mp, *nmp;
1925         struct vnode *vp;
1926
1927         printf("Locked vnodes\n");
1928         simple_lock(&mountlist_slock);
1929         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1930                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1931                         nmp = mp->mnt_list.cqe_next;
1932                         continue;
1933                 }
1934                 for (vp = mp->mnt_vnodelist.lh_first;
1935                      vp != NULL;
1936                      vp = vp->v_mntvnodes.le_next) {
1937                         if (VOP_ISLOCKED(vp))
1938                                 vprint((char *)0, vp);
1939                 }
1940                 simple_lock(&mountlist_slock);
1941                 nmp = mp->mnt_list.cqe_next;
1942                 vfs_unbusy(mp, p);
1943         }
1944         simple_unlock(&mountlist_slock);
1945 }
1946 #endif
1947
1948 /*
1949  * Top level filesystem related information gathering.
1950  */
1951 int
1952 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1953         int *name;
1954         u_int namelen;
1955         void *oldp;
1956         size_t *oldlenp;
1957         void *newp;
1958         size_t newlen;
1959         struct proc *p;
1960 {
1961         struct vfsconf *vfsp;
1962
1963         /*
1964          * The VFS_NUMMNTOPS shouldn't be at name[0] since
1965          * is a VFS generic variable. So now we must check
1966          * namelen so we don't end up covering any UFS
1967          * variables (sinc UFS vfc_typenum is 1).
1968          *
1969          * It should have been:
1970          *    name[0]:  VFS_GENERIC
1971          *    name[1]:  VFS_NUMMNTOPS
1972          */
1973         if (namelen == 1 && name[0] == VFS_NUMMNTOPS) {
1974                 extern unsigned int vfs_nummntops;
1975                 return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops));
1976         }
1977
1978         /* all sysctl names at this level are at least name and field */
1979         if (namelen < 2)
1980                 return (ENOTDIR);               /* overloaded */
1981         if (name[0] != VFS_GENERIC) {
1982                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1983                         if (vfsp->vfc_typenum == name[0])
1984                                 break;
1985                 if (vfsp == NULL)
1986                         return (EOPNOTSUPP);
1987                 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1988                     oldp, oldlenp, newp, newlen, p));
1989         }
1990         switch (name[1]) {
1991         case VFS_MAXTYPENUM:
1992                 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1993         case VFS_CONF:
1994                 if (namelen < 3)
1995                         return (ENOTDIR);       /* overloaded */
1996                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1997                         if (vfsp->vfc_typenum == name[2])
1998                                 break;
1999                 if (vfsp == NULL)
2000                         return (EOPNOTSUPP);
2001                 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
2002                     sizeof(struct vfsconf)));
2003         }
2004         return (EOPNOTSUPP);
2005 }
2006
2007 int kinfo_vdebug = 1;
2008 #define KINFO_VNODESLOP 10
2009 /*
2010  * Dump vnode list (via sysctl).
2011  * Copyout address of vnode followed by vnode.
2012  */
2013 /* ARGSUSED */
2014 int
2015 sysctl_vnode(where, sizep, p)
2016         char *where;
2017         size_t *sizep;
2018         struct proc *p;
2019 {
2020         struct mount *mp, *nmp;
2021         struct vnode *nvp, *vp;
2022         char *bp = where, *savebp;
2023         char *ewhere;
2024         int error;
2025
2026 #define VPTRSZ  sizeof (struct vnode *)
2027 #define VNODESZ sizeof (struct vnode)
2028         if (where == NULL) {
2029                 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2030                 return (0);
2031         }
2032         ewhere = where + *sizep;
2033
2034         simple_lock(&mountlist_slock);
2035         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2036                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
2037                         nmp = mp->mnt_list.cqe_next;
2038                         continue;
2039                 }
2040                 savebp = bp;
2041 again:
2042                 simple_lock(&mntvnode_slock);
2043                 for (vp = mp->mnt_vnodelist.lh_first;
2044                      vp != NULL;
2045                      vp = nvp) {
2046                         /*
2047                          * Check that the vp is still associated with
2048                          * this filesystem.  RACE: could have been
2049                          * recycled onto the same filesystem.
2050                          */
2051                         if (vp->v_mount != mp) {
2052                                 simple_unlock(&mntvnode_slock);
2053                                 if (kinfo_vdebug)
2054                                         printf("kinfo: vp changed\n");
2055                                 bp = savebp;
2056                                 goto again;
2057                         }
2058                         nvp = vp->v_mntvnodes.le_next;
2059                         if (bp + VPTRSZ + VNODESZ > ewhere) {
2060                                 simple_unlock(&mntvnode_slock);
2061                                 *sizep = bp - where;
2062                                 return (ENOMEM);
2063                         }
2064                         simple_unlock(&mntvnode_slock);
2065                         if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
2066                            (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
2067                                 return (error);
2068                         bp += VPTRSZ + VNODESZ;
2069                         simple_lock(&mntvnode_slock);
2070                 }
2071                 simple_unlock(&mntvnode_slock);
2072                 simple_lock(&mountlist_slock);
2073                 nmp = mp->mnt_list.cqe_next;
2074                 vfs_unbusy(mp, p);
2075         }
2076         simple_unlock(&mountlist_slock);
2077
2078         *sizep = bp - where;
2079         return (0);
2080 }
2081
2082 /*
2083  * Check to see if a filesystem is mounted on a block device.
2084  */
2085 int
2086 vfs_mountedon(vp)
2087         struct vnode *vp;
2088 {
2089         struct vnode *vq;
2090         int error = 0;
2091
2092         if (vp->v_specflags & SI_MOUNTEDON)
2093                 return (EBUSY);
2094         if (vp->v_flag & VALIASED) {
2095                 simple_lock(&spechash_slock);
2096                 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
2097                         if (vq->v_rdev != vp->v_rdev ||
2098                             vq->v_type != vp->v_type)
2099                                 continue;
2100                         if (vq->v_specflags & SI_MOUNTEDON) {
2101                                 error = EBUSY;
2102                                 break;
2103                         }
2104                 }
2105                 simple_unlock(&spechash_slock);
2106         }
2107         return (error);
2108 }
2109
2110 /*
2111  * Unmount all filesystems. The list is traversed in reverse order
2112  * of mounting to avoid dependencies.
2113  */
2114 __private_extern__ void
2115 vfs_unmountall()
2116 {
2117         struct mount *mp, *nmp;
2118         struct proc *p = current_proc();
2119
2120         /*
2121          * Since this only runs when rebooting, it is not interlocked.
2122          */
2123         for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2124                 nmp = mp->mnt_list.cqe_prev;
2125                 (void) dounmount(mp, MNT_FORCE, p);
2126         }
2127 }
2128
2129 /*
2130  * Build hash lists of net addresses and hang them off the mount point.
2131  * Called by vfs_export() to set up the lists of export addresses.
2132  */
2133 static int
2134 vfs_hang_addrlist(mp, nep, argp)
2135         struct mount *mp;
2136         struct netexport *nep;
2137         struct export_args *argp;
2138 {
2139         register struct netcred *np;
2140         register struct radix_node_head *rnh;
2141         register int i;
2142         struct radix_node *rn;
2143         struct sockaddr *saddr, *smask = 0;
2144         struct domain *dom;
2145         int error;
2146
2147         if (argp->ex_addrlen == 0) {
2148                 if (mp->mnt_flag & MNT_DEFEXPORTED)
2149                         return (EPERM);
2150                 np = &nep->ne_defexported;
2151                 np->netc_exflags = argp->ex_flags;
2152                 np->netc_anon = argp->ex_anon;
2153                 np->netc_anon.cr_ref = 1;
2154                 mp->mnt_flag |= MNT_DEFEXPORTED;
2155                 return (0);
2156         }
2157         i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2158         MALLOC(np, struct netcred *, i, M_NETADDR, M_WAITOK);
2159         bzero((caddr_t)np, i);
2160         saddr = (struct sockaddr *)(np + 1);
2161         if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
2162                 goto out;
2163         if (saddr->sa_len > argp->ex_addrlen)
2164                 saddr->sa_len = argp->ex_addrlen;
2165         if (argp->ex_masklen) {
2166                 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
2167                 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
2168                 if (error)
2169                         goto out;
2170                 if (smask->sa_len > argp->ex_masklen)
2171                         smask->sa_len = argp->ex_masklen;
2172         }
2173         i = saddr->sa_family;
2174         if ((rnh = nep->ne_rtable[i]) == 0) {
2175                 /*
2176                  * Seems silly to initialize every AF when most are not
2177                  * used, do so on demand here
2178                  */
2179                 for (dom = domains; dom; dom = dom->dom_next)
2180                         if (dom->dom_family == i && dom->dom_rtattach) {
2181                                 dom->dom_rtattach((void **)&nep->ne_rtable[i],
2182                                         dom->dom_rtoffset);
2183                                 break;
2184                         }
2185                 if ((rnh = nep->ne_rtable[i]) == 0) {
2186                         error = ENOBUFS;
2187                         goto out;
2188                 }
2189         }
2190         rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
2191                 np->netc_rnodes);
2192         if (rn == 0) {
2193                 /*
2194                  * One of the reasons that rnh_addaddr may fail is that
2195                  * the entry already exists. To check for this case, we
2196                  * look up the entry to see if it is there. If so, we
2197                  * do not need to make a new entry but do return success.
2198                  */
2199                 _FREE(np, M_NETADDR);
2200                 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
2201                 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
2202                     ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
2203                     !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
2204                             (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
2205                         return (0);
2206                 return (EPERM);
2207         }
2208         np->netc_exflags = argp->ex_flags;
2209         np->netc_anon = argp->ex_anon;
2210         np->netc_anon.cr_ref = 1;
2211         return (0);
2212 out:
2213         _FREE(np, M_NETADDR);
2214         return (error);
2215 }
2216
2217 /* ARGSUSED */
2218 static int
2219 vfs_free_netcred(rn, w)
2220         struct radix_node *rn;
2221         caddr_t w;
2222 {
2223         register struct radix_node_head *rnh = (struct radix_node_head *)w;
2224
2225         (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
2226         _FREE((caddr_t)rn, M_NETADDR);
2227         return (0);
2228 }
2229
2230 /*
2231  * Free the net address hash lists that are hanging off the mount points.
2232  */
2233 static void
2234 vfs_free_addrlist(nep)
2235         struct netexport *nep;
2236 {
2237         register int i;
2238         register struct radix_node_head *rnh;
2239
2240         for (i = 0; i <= AF_MAX; i++)
2241                 if (rnh = nep->ne_rtable[i]) {
2242                         (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
2243                             (caddr_t)rnh);
2244                         _FREE((caddr_t)rnh, M_RTABLE);
2245                         nep->ne_rtable[i] = 0;
2246                 }
2247 }
2248
2249 int
2250 vfs_export(mp, nep, argp)
2251         struct mount *mp;
2252         struct netexport *nep;
2253         struct export_args *argp;
2254 {
2255         int error;
2256
2257         if (argp->ex_flags & MNT_DELEXPORT) {
2258                 vfs_free_addrlist(nep);
2259                 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2260         }
2261         if (argp->ex_flags & MNT_EXPORTED) {
2262                 if (error = vfs_hang_addrlist(mp, nep, argp))
2263                         return (error);
2264                 mp->mnt_flag |= MNT_EXPORTED;
2265         }
2266         return (0);
2267 }
2268
2269 struct netcred *
2270 vfs_export_lookup(mp, nep, nam)
2271         register struct mount *mp;
2272         struct netexport *nep;
2273         struct mbuf *nam;
2274 {
2275         register struct netcred *np;
2276         register struct radix_node_head *rnh;
2277         struct sockaddr *saddr;
2278
2279         np = NULL;
2280         if (mp->mnt_flag & MNT_EXPORTED) {
2281                 /*
2282                  * Lookup in the export list first.
2283                  */
2284                 if (nam != NULL) {
2285                         saddr = mtod(nam, struct sockaddr *);
2286                         rnh = nep->ne_rtable[saddr->sa_family];
2287                         if (rnh != NULL) {
2288                                 np = (struct netcred *)
2289                                         (*rnh->rnh_matchaddr)((caddr_t)saddr,
2290                                                               rnh);
2291                                 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2292                                         np = NULL;
2293                         }
2294                 }
2295                 /*
2296                  * If no address match, use the default if it exists.
2297                  */
2298                 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2299                         np = &nep->ne_defexported;
2300         }
2301         return (np);
2302 }
2303
2304 /*
2305  * try to reclaim vnodes from the memory
2306  * object cache
2307  */
2308 static int
2309 vm_object_cache_reclaim(int count)
2310 {
2311         int cnt;
2312         void vnode_pager_release_from_cache(int *);
2313
2314         /* attempt to reclaim vnodes from VM object cache */
2315         cnt = count;
2316         vnode_pager_release_from_cache(&cnt);
2317         return(cnt);
2318 }
2319
2320 /*
2321  * Release memory object reference held by inactive vnodes
2322  * and then try to reclaim some vnodes from the memory
2323  * object cache
2324  */
2325 static int
2326 vnreclaim(int count)
2327 {
2328         int i, loopcnt;
2329         struct vnode *vp;
2330         int err;
2331         struct proc *p;
2332
2333         i = 0;
2334         loopcnt = 0;
2335
2336         /* Try to release "count" vnodes from the inactive list */
2337 restart:
2338         if (++loopcnt > inactivevnodes) {
2339                 /*
2340                  * I did my best trying to reclaim the vnodes.
2341                  * Do not try any more as that would only lead to
2342                  * long latencies. Also in the worst case
2343                  * this can get totally CPU bound.
2344                  * Just fall though and attempt a reclaim of VM
2345                  * object cache
2346                  */
2347                 goto out;
2348         }
2349
2350         simple_lock(&vnode_free_list_slock);
2351         for (vp = TAILQ_FIRST(&vnode_inactive_list);
2352                         (vp != NULLVP) && (i < count);
2353                         vp = TAILQ_NEXT(vp, v_freelist)) {
2354
2355                 if (!simple_lock_try(&vp->v_interlock))
2356                         continue;
2357
2358                 if (vp->v_usecount != 1)
2359                         panic("vnreclaim: v_usecount");
2360
2361                 if(!UBCINFOEXISTS(vp)) {
2362                         if (vp->v_type == VBAD) {
2363                                 VREMINACTIVE("vnreclaim", vp);
2364                                 simple_unlock(&vp->v_interlock);
2365                                 continue;
2366                         } else
2367                                 panic("non UBC vnode on inactive list");
2368                                 /* Should not reach here */
2369                 }
2370
2371                 /* If vnode is already being reclaimed, wait */
2372                 if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
2373                         vp->v_flag |= VXWANT;
2374                         simple_unlock(&vp->v_interlock);
2375                         simple_unlock(&vnode_free_list_slock);
2376                         (void)tsleep((caddr_t)vp, PINOD, "vocr", 0);
2377                         goto restart;
2378                 }
2379
2380                 VREMINACTIVE("vnreclaim", vp);
2381                 simple_unlock(&vnode_free_list_slock);
2382
2383                 if (ubc_issetflags(vp, UI_WASMAPPED)) {
2384                         /*
2385                          * We should not reclaim as it is likely
2386                          * to be in use. Let it die a natural death.
2387                          * Release the UBC reference if one exists
2388                          * and put it back at the tail.
2389                          */
2390                         simple_unlock(&vp->v_interlock);
2391                         if (ubc_release_named(vp)) {
2392                                 if (UBCINFOEXISTS(vp)) {
2393                                         simple_lock(&vp->v_interlock);
2394                                         if (vp->v_usecount == 1 && !VONLIST(vp))
2395                                                 vinactive(vp);
2396                                         simple_unlock(&vp->v_interlock);
2397                                 }
2398                         } else {
2399                             simple_lock(&vp->v_interlock);
2400                                 vinactive(vp);
2401                                 simple_unlock(&vp->v_interlock);
2402                         }
2403                 } else {
2404                         int didhold;
2405
2406                         VORECLAIM_ENABLE(vp);
2407
2408                         /*
2409                          * scrub the dirty pages and invalidate the buffers
2410                          */
2411                         p = current_proc();
2412                         err = vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p);
2413                         if (err) {
2414                                 /* cannot reclaim */
2415                                 simple_lock(&vp->v_interlock);
2416                                 vinactive(vp);
2417                                 VORECLAIM_DISABLE(vp);
2418                                 i++;
2419                                 simple_unlock(&vp->v_interlock);
2420                                 goto restart;
2421                         }
2422
2423                         /* keep the vnode alive so we can kill it */
2424                         simple_lock(&vp->v_interlock);
2425                         if(vp->v_usecount != 1)
2426                                 panic("VOCR: usecount race");
2427                         vp->v_usecount++;
2428                         simple_unlock(&vp->v_interlock);
2429
2430                         /* clean up the state in VM without invalidating */
2431                         didhold = ubc_hold(vp);
2432                         if (didhold)
2433                                 (void)ubc_clean(vp, 0);
2434
2435                         /* flush and invalidate buffers associated with the vnode */
2436                         if (vp->v_tag == VT_NFS)
2437                                 nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
2438                         else
2439                                 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
2440
2441                         /*
2442                          * Note: for the v_usecount == 2 case, VOP_INACTIVE
2443                          * has not yet been called.  Call it now while vp is
2444                          * still locked, it will also release the lock.
2445                          */
2446                         if (vp->v_usecount == 2)
2447                                 VOP_INACTIVE(vp, p);
2448                         else
2449                                 VOP_UNLOCK(vp, 0, p);
2450
2451                         if (didhold)
2452                                 ubc_rele(vp);
2453
2454                         /*
2455                          * destroy the ubc named reference.
2456                          * If we can't because it is held for I/Os
2457                          * in progress, just put it back on the inactive
2458                          * list and move on.  Otherwise, the paging reference
2459                          * is toast (and so is this vnode?).
2460                          */
2461                         if (ubc_destroy_named(vp)) {
2462                             i++;
2463                         }
2464                         simple_lock(&vp->v_interlock);
2465                         VORECLAIM_DISABLE(vp);
2466                         simple_unlock(&vp->v_interlock);
2467                         vrele(vp);  /* release extra use we added here */
2468                 }
2469                 /* inactive list lock was released, must restart */
2470                 goto restart;
2471         }
2472         simple_unlock(&vnode_free_list_slock);
2473
2474         vnode_reclaim_tried += i;
2475 out:
2476         i = vm_object_cache_reclaim(count);
2477         vnode_objects_reclaimed += i;
2478
2479         return(i);
2480 }
2481
2482 /*
2483  * This routine is called from vnode_pager_no_senders()
2484  * which in turn can be called with vnode locked by vnode_uncache()
2485  * But it could also get called as a result of vm_object_cache_trim().
2486  * In that case lock state is unknown.
2487  * AGE the vnode so that it gets recycled quickly.
2488  * Check lock status to decide whether to call vput() or vrele().
2489  */
2490 __private_extern__ void
2491 vnode_pager_vrele(struct vnode *vp)
2492 {
2493
2494         boolean_t       funnel_state;
2495         int isvnreclaim = 1;
2496
2497         if (vp == (struct vnode *) NULL)
2498                 panic("vnode_pager_vrele: null vp");
2499
2500         funnel_state = thread_funnel_set(kernel_flock, TRUE);
2501
2502         /* Mark the vnode to be recycled */
2503         vagevp(vp);
2504
2505         simple_lock(&vp->v_interlock);
2506         /*
2507          * If a vgone (or vclean) is already in progress,
2508          * Do not bother with the ubc_info cleanup.
2509          * Let the vclean deal with it.
2510          */
2511         if (vp->v_flag & VXLOCK) {
2512                 CLR(vp->v_flag, VTERMINATE);
2513                 if (ISSET(vp->v_flag, VTERMWANT)) {
2514                         CLR(vp->v_flag, VTERMWANT);
2515                         wakeup((caddr_t)&vp->v_ubcinfo);
2516                 }
2517                 simple_unlock(&vp->v_interlock);
2518                 vrele(vp);
2519                 (void) thread_funnel_set(kernel_flock, funnel_state);
2520                 return;
2521         }
2522
2523         /* It's dead, Jim! */
2524         if (!ISSET(vp->v_flag, VORECLAIM)) {
2525                 /*
2526                  * called as a result of eviction of the memory
2527                  * object from the memory object cache
2528                  */
2529                 isvnreclaim = 0;
2530
2531                 /* So serialize vnode operations */
2532                 VORECLAIM_ENABLE(vp);
2533         }
2534         if (!ISSET(vp->v_flag, VTERMINATE))
2535                 SET(vp->v_flag, VTERMINATE);
2536         if (UBCINFOEXISTS(vp)) {
2537                 struct ubc_info *uip = vp->v_ubcinfo;
2538
2539                 if (ubc_issetflags(vp, UI_WASMAPPED))
2540                         SET(vp->v_flag, VWASMAPPED);
2541
2542                 vp->v_ubcinfo = UBC_NOINFO;  /* catch bad accesses */
2543                 simple_unlock(&vp->v_interlock);
2544                 ubc_info_deallocate(uip);
2545         } else {
2546                 if ((vp->v_type == VBAD) && ((vp)->v_ubcinfo != UBC_INFO_NULL)
2547                         && ((vp)->v_ubcinfo != UBC_NOINFO)) {
2548                         struct ubc_info *uip = vp->v_ubcinfo;
2549
2550                         vp->v_ubcinfo = UBC_NOINFO;  /* catch bad accesses */
2551                         simple_unlock(&vp->v_interlock);
2552                         ubc_info_deallocate(uip);
2553                 } else {
2554                         simple_unlock(&vp->v_interlock);
2555                 }
2556         }
2557
2558         CLR(vp->v_flag, VTERMINATE);
2559
2560         if (vp->v_type != VBAD){
2561                 vgone(vp);      /* revoke the vnode */
2562                 vrele(vp);      /* and drop the reference */
2563         } else
2564                 vrele(vp);
2565
2566         if (ISSET(vp->v_flag, VTERMWANT)) {
2567                 CLR(vp->v_flag, VTERMWANT);
2568                 wakeup((caddr_t)&vp->v_ubcinfo);
2569         }
2570         if (!isvnreclaim)
2571                 VORECLAIM_DISABLE(vp);
2572         (void) thread_funnel_set(kernel_flock, funnel_state);
2573         return;
2574 }
2575
2576
2577 #if DIAGNOSTIC
2578 int walk_vnodes_debug=0;
2579
2580 void
2581 walk_allvnodes()
2582 {
2583         struct mount *mp, *nmp;
2584         struct vnode *vp;
2585         int cnt = 0;
2586
2587         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2588                 for (vp = mp->mnt_vnodelist.lh_first;
2589                      vp != NULL;
2590                      vp = vp->v_mntvnodes.le_next) {
2591                         if (vp->v_usecount < 0){
2592                                 if(walk_vnodes_debug) {
2593                                         printf("vp is %x\n",vp);
2594                                 }
2595                         }
2596                 }
2597                 nmp = mp->mnt_list.cqe_next;
2598         }
2599         for (cnt = 0, vp = vnode_free_list.tqh_first;
2600                 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2601                 if ((vp->v_usecount < 0) && walk_vnodes_debug) {
2602                         if(walk_vnodes_debug) {
2603                                 printf("vp is %x\n",vp);
2604                         }
2605                 }
2606         }
2607         printf("%d - free\n", cnt);
2608
2609         for (cnt = 0, vp = vnode_inactive_list.tqh_first;
2610                 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2611                 if ((vp->v_usecount < 0) && walk_vnodes_debug) {
2612                         if(walk_vnodes_debug) {
2613                                 printf("vp is %x\n",vp);
2614                         }
2615                 }
2616         }
2617         printf("%d - inactive\n", cnt);
2618 }
2619 #endif /* DIAGNOSTIC */
2620
2621 void
2622 vfs_io_attributes(vp, flags, iosize, vectors)
2623         struct vnode    *vp;
2624         int     flags;  /* B_READ or B_WRITE */
2625         int     *iosize;
2626         int     *vectors;
2627 {
2628         struct mount *mp;
2629
2630         /* start with "reasonable" defaults */
2631         *iosize = MAXPHYS;
2632         *vectors = 32;
2633
2634         mp = vp->v_mount;
2635         if (mp != NULL) {
2636                 switch (flags) {
2637                 case B_READ:
2638                         *iosize = mp->mnt_maxreadcnt;
2639                         *vectors = mp->mnt_segreadcnt;
2640                         break;
2641                 case B_WRITE:
2642                         *iosize = mp->mnt_maxwritecnt;
2643                         *vectors = mp->mnt_segwritecnt;
2644                         break;
2645                 default:
2646                         break;
2647                 }
2648         }
2649
2650         return;
2651 }
2652
2653 #include <dev/disk.h>
2654
2655 int
2656 vfs_init_io_attributes(devvp, mp)
2657         struct vnode *devvp;
2658         struct mount *mp;
2659 {
2660         int error;
2661         off_t readblockcnt;
2662         off_t writeblockcnt;
2663         off_t readsegcnt;
2664         off_t writesegcnt;
2665         u_long blksize;
2666
2667         u_int64_t temp;
2668
2669         struct proc *p = current_proc();
2670         struct  ucred *cred = p->p_ucred;
2671
2672         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD,
2673                                 (caddr_t)&readblockcnt, 0, cred, p)))
2674                 return (error);
2675
2676         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE,
2677                                 (caddr_t)&writeblockcnt, 0, cred, p)))
2678                 return (error);
2679
2680         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD,
2681                                 (caddr_t)&readsegcnt, 0, cred, p)))
2682                 return (error);
2683
2684         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE,
2685                                 (caddr_t)&writesegcnt, 0, cred, p)))
2686                 return (error);
2687
2688         if ((error = VOP_IOCTL(devvp, DKIOCGETBLOCKSIZE,
2689                                 (caddr_t)&blksize, 0, cred, p)))
2690                 return (error);
2691
2692         temp = readblockcnt * blksize;
2693         temp = (temp > UINT32_MAX) ? (UINT32_MAX / blksize) * blksize : temp;
2694         mp->mnt_maxreadcnt = (u_int32_t)temp;
2695
2696         temp = writeblockcnt * blksize;
2697         temp = (temp > UINT32_MAX) ? (UINT32_MAX / blksize) * blksize : temp;
2698         mp->mnt_maxwritecnt = (u_int32_t)temp;
2699
2700         temp = (readsegcnt > UINT16_MAX) ? UINT16_MAX : readsegcnt;
2701         mp->mnt_segreadcnt = (u_int16_t)temp;
2702
2703         temp = (writesegcnt > UINT16_MAX) ? UINT16_MAX : writesegcnt;
2704         mp->mnt_segwritecnt = (u_int16_t)temp;
2705
2706 #if 0
2707         printf("--- IO attributes for mount point 0x%08x ---\n", mp);
2708         printf("\tmnt_maxreadcnt = 0x%x", mp->mnt_maxreadcnt);
2709         printf("\tmnt_maxwritecnt = 0x%x\n", mp->mnt_maxwritecnt);
2710         printf("\tmnt_segreadcnt = 0x%x", mp->mnt_segreadcnt);
2711         printf("\tmnt_segwritecnt = 0x%x\n", mp->mnt_segwritecnt);
2712 #endif /* 0 */
2713
2714         return (error);
2715 }
2716