bsd/vfs/vfs_subr.c

   1 /*
   2  * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  23 /*
  24  * Copyright (c) 1989, 1993
  25  *      The Regents of the University of California.  All rights reserved.
  26  * (c) UNIX System Laboratories, Inc.
  27  * All or some portions of this file are derived from material licensed
  28  * to the University of California by American Telephone and Telegraph
  29  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  30  * the permission of UNIX System Laboratories, Inc.
  31  *
  32  * Redistribution and use in source and binary forms, with or without
  33  * modification, are permitted provided that the following conditions
  34  * are met:
  35  * 1. Redistributions of source code must retain the above copyright
  36  *    notice, this list of conditions and the following disclaimer.
  37  * 2. Redistributions in binary form must reproduce the above copyright
  38  *    notice, this list of conditions and the following disclaimer in the
  39  *    documentation and/or other materials provided with the distribution.
  40  * 3. All advertising materials mentioning features or use of this software
  41  *    must display the following acknowledgement:
  42  *      This product includes software developed by the University of
  43  *      California, Berkeley and its contributors.
  44  * 4. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  *      @(#)vfs_subr.c  8.31 (Berkeley) 5/26/95
  61  */
  62
  63 /*
  64  * External virtual filesystem routines
  65  */
  66
  67 #define DIAGNOSTIC 1
  68
  69 #include <sys/param.h>
  70 #include <sys/systm.h>
  71 #include <sys/proc.h>
  72 #include <sys/mount.h>
  73 #include <sys/time.h>
  74 #include <sys/vnode.h>
  75 #include <sys/stat.h>
  76 #include <sys/namei.h>
  77 #include <sys/ucred.h>
  78 #include <sys/buf.h>
  79 #include <sys/errno.h>
  80 #include <sys/malloc.h>
  81 #include <sys/domain.h>
  82 #include <sys/mbuf.h>
  83 #include <sys/syslog.h>
  84 #include <sys/ubc.h>
  85 #include <sys/vm.h>
  86 #include <sys/sysctl.h>
  87
  88 #include <kern/assert.h>
  89
  90 #include <miscfs/specfs/specdev.h>
  91
  92 #include <mach/mach_types.h>
  93 #include <mach/memory_object_types.h>
  94
  95
  96 enum vtype iftovt_tab[16] = {
  97         VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
  98         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
  99 };
 100 int     vttoif_tab[9] = {
 101         0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
 102         S_IFSOCK, S_IFIFO, S_IFMT,
 103 };
 104
 105 static void vfree(struct vnode *vp);
 106 static void vinactive(struct vnode *vp);
 107 static int vnreclaim(int count);
 108 extern kern_return_t
 109         adjust_vm_object_cache(vm_size_t oval, vm_size_t nval);
 110
 111 /*
 112  * Insq/Remq for the vnode usage lists.
 113  */
 114 #define bufinsvn(bp, dp)        LIST_INSERT_HEAD(dp, bp, b_vnbufs)
 115 #define bufremvn(bp) {                                                  \
 116         LIST_REMOVE(bp, b_vnbufs);                                      \
 117         (bp)->b_vnbufs.le_next = NOLIST;                                \
 118 }
 119
 120 TAILQ_HEAD(freelst, vnode) vnode_free_list;     /* vnode free list */
 121 TAILQ_HEAD(inactivelst, vnode) vnode_inactive_list;     /* vnode inactive list */
 122 struct mntlist mountlist;                       /* mounted filesystem list */
 123
 124 #if DIAGNOSTIC
 125 #define VLISTCHECK(fun, vp, list)       \
 126         if ((vp)->v_freelist.tqe_prev == (struct vnode **)0xdeadb) \
 127                 panic("%s: %s vnode not on %slist", (fun), (list), (list));
 128
 129 #define VINACTIVECHECK(fun, vp, expected)       \
 130         do {    \
 131                 int __is_inactive = ISSET((vp)->v_flag, VUINACTIVE);    \
 132                 if (__is_inactive ^ expected)   \
 133                         panic("%s: %sinactive vnode, expected %s", (fun),       \
 134                                 __is_inactive? "" : "not ",     \
 135                                 expected? "inactive": "not inactive"); \
 136         } while(0)
 137 #else
 138 #define VLISTCHECK(fun, vp, list)
 139 #define VINACTIVECHECK(fun, vp, expected)
 140 #endif /* DIAGNOSTIC */
 141
 142 #define VLISTNONE(vp)   \
 143         do {    \
 144                 (vp)->v_freelist.tqe_next = (struct vnode *)0;  \
 145                 (vp)->v_freelist.tqe_prev = (struct vnode **)0xdeadb;   \
 146         } while(0)
 147
 148 #define VONLIST(vp)     \
 149         ((vp)->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
 150
 151 /* remove a vnode from free vnode list */
 152 #define VREMFREE(fun, vp)       \
 153         do {    \
 154                 VLISTCHECK((fun), (vp), "free");        \
 155                 TAILQ_REMOVE(&vnode_free_list, (vp), v_freelist);       \
 156                 VLISTNONE((vp));        \
 157                 freevnodes--;   \
 158         } while(0)
 159
 160 /* remove a vnode from inactive vnode list */
 161 #define VREMINACTIVE(fun, vp)   \
 162         do {    \
 163                 VLISTCHECK((fun), (vp), "inactive"); \
 164                 VINACTIVECHECK((fun), (vp), VUINACTIVE); \
 165                 TAILQ_REMOVE(&vnode_inactive_list, (vp), v_freelist); \
 166                 CLR((vp)->v_flag, VUINACTIVE); \
 167                 VLISTNONE((vp));        \
 168                 inactivevnodes--;       \
 169         } while(0)
 170
 171 #define VORECLAIM_ENABLE(vp)   \
 172         do {    \
 173                 if (ISSET((vp)->v_flag, VORECLAIM))     \
 174                         panic("vm object raclaim already");     \
 175                 SET((vp)->v_flag, VORECLAIM);   \
 176         } while(0)
 177
 178 #define VORECLAIM_DISABLE(vp)   \
 179         do {    \
 180                 CLR((vp)->v_flag, VORECLAIM);   \
 181                 if (ISSET((vp)->v_flag, VXWANT)) {      \
 182                         CLR((vp)->v_flag, VXWANT);      \
 183                         wakeup((caddr_t)(vp));  \
 184                 }       \
 185         } while(0)
 186
 187 /*
 188  * Have to declare first two locks as actual data even if !MACH_SLOCKS, since
 189  * a pointers to them get passed around.
 190  */
 191 simple_lock_data_t mountlist_slock;
 192 simple_lock_data_t mntvnode_slock;
 193 decl_simple_lock_data(,mntid_slock);
 194 decl_simple_lock_data(,vnode_free_list_slock);
 195 decl_simple_lock_data(,spechash_slock);
 196
 197 /*
 198  * vnodetarget is the amount of vnodes we expect to get back
 199  * from the the inactive vnode list and VM object cache.
 200  * As vnreclaim() is a mainly cpu bound operation for faster
 201  * processers this number could be higher.
 202  * Having this number too high introduces longer delays in
 203  * the execution of getnewvnode().
 204  */
 205 unsigned long vnodetarget;              /* target for vnreclaim() */
 206 #define VNODE_FREE_TARGET       20      /* Default value for vnodetarget */
 207
 208 /*
 209  * We need quite a few vnodes on the free list to sustain the
 210  * rapid stat() the compilation process does, and still benefit from the name
 211  * cache. Having too few vnodes on the free list causes serious disk
 212  * thrashing as we cycle through them.
 213  */
 214 #define VNODE_FREE_MIN          300     /* freelist should have at least these many */
 215
 216 /*
 217  * We need to get vnodes back from the VM object cache when a certain #
 218  * of vnodes are reused from the freelist. This is essential for the
 219  * caching to be effective in the namecache and the buffer cache [for the
 220  * metadata].
 221  */
 222 #define VNODE_TOOMANY_REUSED    (VNODE_FREE_MIN/4)
 223
 224 /*
 225  * If we have enough vnodes on the freelist we do not want to reclaim
 226  * the vnodes from the VM object cache.
 227  */
 228 #define VNODE_FREE_ENOUGH       (VNODE_FREE_MIN + (VNODE_FREE_MIN/2))
 229
 230 /*
 231  * Initialize the vnode management data structures.
 232  */
 233 __private_extern__ void
 234 vntblinit()
 235 {
 236         extern struct lock__bsd__       exchangelock;
 237
 238         simple_lock_init(&mountlist_slock);
 239         simple_lock_init(&mntvnode_slock);
 240         simple_lock_init(&mntid_slock);
 241         simple_lock_init(&spechash_slock);
 242         TAILQ_INIT(&vnode_free_list);
 243         simple_lock_init(&vnode_free_list_slock);
 244         TAILQ_INIT(&vnode_inactive_list);
 245         CIRCLEQ_INIT(&mountlist);
 246     lockinit(&exchangelock, PVFS, "exchange", 0, 0);
 247
 248         if (!vnodetarget)
 249                 vnodetarget = VNODE_FREE_TARGET;
 250
 251         /*
 252          * Scale the vm_object_cache to accomodate the vnodes
 253          * we want to cache
 254          */
 255         (void) adjust_vm_object_cache(0, desiredvnodes - VNODE_FREE_MIN);
 256 }
 257
 258 /* Reset the VM Object Cache with the values passed in */
 259 __private_extern__ kern_return_t
 260 reset_vmobjectcache(unsigned int val1, unsigned int val2)
 261 {
 262         vm_size_t oval = val1 - VNODE_FREE_MIN;
 263         vm_size_t nval = val2 - VNODE_FREE_MIN;
 264
 265         return(adjust_vm_object_cache(oval, nval));
 266 }
 267
 268 /*
 269  * Mark a mount point as busy. Used to synchronize access and to delay
 270  * unmounting. Interlock is not released on failure.
 271  */
 272 int
 273 vfs_busy(mp, flags, interlkp, p)
 274         struct mount *mp;
 275         int flags;
 276         struct slock *interlkp;
 277         struct proc *p;
 278 {
 279         int lkflags;
 280
 281         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 282                 if (flags & LK_NOWAIT)
 283                         return (ENOENT);
 284                 mp->mnt_kern_flag |= MNTK_MWAIT;
 285                 if (interlkp)
 286                         simple_unlock(interlkp);
 287                 /*
 288                  * Since all busy locks are shared except the exclusive
 289                  * lock granted when unmounting, the only place that a
 290                  * wakeup needs to be done is at the release of the
 291                  * exclusive lock at the end of dounmount.
 292                  */
 293                 sleep((caddr_t)mp, PVFS);
 294                 if (interlkp)
 295                         simple_lock(interlkp);
 296                 return (ENOENT);
 297         }
 298         lkflags = LK_SHARED;
 299         if (interlkp)
 300                 lkflags |= LK_INTERLOCK;
 301         if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
 302                 panic("vfs_busy: unexpected lock failure");
 303         return (0);
 304 }
 305
 306 /*
 307  * Free a busy filesystem.
 308  */
 309 void
 310 vfs_unbusy(mp, p)
 311         struct mount *mp;
 312         struct proc *p;
 313 {
 314
 315         lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
 316 }
 317
 318 /*
 319  * Lookup a filesystem type, and if found allocate and initialize
 320  * a mount structure for it.
 321  *
 322  * Devname is usually updated by mount(8) after booting.
 323  */
 324 int
 325 vfs_rootmountalloc(fstypename, devname, mpp)
 326         char *fstypename;
 327         char *devname;
 328         struct mount **mpp;
 329 {
 330         struct proc *p = current_proc();        /* XXX */
 331         struct vfsconf *vfsp;
 332         struct mount *mp;
 333
 334         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 335                 if (!strcmp(vfsp->vfc_name, fstypename))
 336                         break;
 337         if (vfsp == NULL)
 338                 return (ENODEV);
 339         mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 340         bzero((char *)mp, (u_long)sizeof(struct mount));
 341
 342     /* Initialize the default IO constraints */
 343     mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
 344     mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
 345
 346         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
 347         (void)vfs_busy(mp, LK_NOWAIT, 0, p);
 348         LIST_INIT(&mp->mnt_vnodelist);
 349         mp->mnt_vfc = vfsp;
 350         mp->mnt_op = vfsp->vfc_vfsops;
 351         mp->mnt_flag = MNT_RDONLY;
 352         mp->mnt_vnodecovered = NULLVP;
 353         vfsp->vfc_refcount++;
 354         mp->mnt_stat.f_type = vfsp->vfc_typenum;
 355         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 356         strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
 357         mp->mnt_stat.f_mntonname[0] = '/';
 358         (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
 359         *mpp = mp;
 360         return (0);
 361 }
 362
 363 /*
 364  * Find an appropriate filesystem to use for the root. If a filesystem
 365  * has not been preselected, walk through the list of known filesystems
 366  * trying those that have mountroot routines, and try them until one
 367  * works or we have tried them all.
 368  */
 369 int
 370 vfs_mountroot()
 371 {
 372         struct vfsconf *vfsp;
 373         extern int (*mountroot)(void);
 374         int error;
 375
 376         if (mountroot != NULL) {
 377                 error = (*mountroot)();
 378                 return (error);
 379         }
 380
 381         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 382                 if (vfsp->vfc_mountroot == NULL)
 383                         continue;
 384                 if ((error = (*vfsp->vfc_mountroot)()) == 0)
 385                         return (0);
 386                 if (error != EINVAL)
 387                         printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
 388         }
 389         return (ENODEV);
 390 }
 391
 392 /*
 393  * Lookup a mount point by filesystem identifier.
 394  */
 395 struct mount *
 396 vfs_getvfs(fsid)
 397         fsid_t *fsid;
 398 {
 399         register struct mount *mp;
 400
 401         simple_lock(&mountlist_slock);
 402         for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
 403              mp = mp->mnt_list.cqe_next) {
 404                 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 405                     mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 406                         simple_unlock(&mountlist_slock);
 407                         return (mp);
 408                 }
 409         }
 410         simple_unlock(&mountlist_slock);
 411         return ((struct mount *)0);
 412 }
 413
 414 /*
 415  * Get a new unique fsid
 416  */
 417 void
 418 vfs_getnewfsid(mp)
 419         struct mount *mp;
 420 {
 421 static u_short xxxfs_mntid;
 422
 423         fsid_t tfsid;
 424         int mtype;
 425
 426         simple_lock(&mntid_slock);
 427         mtype = mp->mnt_vfc->vfc_typenum;
 428         mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
 429         mp->mnt_stat.f_fsid.val[1] = mtype;
 430         if (xxxfs_mntid == 0)
 431                 ++xxxfs_mntid;
 432         tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
 433         tfsid.val[1] = mtype;
 434         if (mountlist.cqh_first != (void *)&mountlist) {
 435                 while (vfs_getvfs(&tfsid)) {
 436                         tfsid.val[0]++;
 437                         xxxfs_mntid++;
 438                 }
 439         }
 440         mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
 441         simple_unlock(&mntid_slock);
 442 }
 443
 444 /*
 445  * Set vnode attributes to VNOVAL
 446  */
 447 void
 448 vattr_null(vap)
 449         register struct vattr *vap;
 450 {
 451
 452         vap->va_type = VNON;
 453         vap->va_size = vap->va_bytes = VNOVAL;
 454         vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
 455                 vap->va_fsid = vap->va_fileid =
 456                 vap->va_blocksize = vap->va_rdev =
 457                 vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
 458                 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
 459                 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
 460                 vap->va_flags = vap->va_gen = VNOVAL;
 461         vap->va_vaflags = 0;
 462 }
 463
 464 /*
 465  * Routines having to do with the management of the vnode table.
 466  */
 467 extern int (**dead_vnodeop_p)(void *);
 468 static void vclean __P((struct vnode *vp, int flag, struct proc *p));
 469 extern void vgonel __P((struct vnode *vp, struct proc *p));
 470 long numvnodes, freevnodes;
 471 long inactivevnodes;
 472 long vnode_reclaim_tried;
 473 long vnode_objects_reclaimed;
 474
 475
 476 extern struct vattr va_null;
 477
 478 /*
 479  * Return the next vnode from the free list.
 480  */
 481 int
 482 getnewvnode(tag, mp, vops, vpp)
 483         enum vtagtype tag;
 484         struct mount *mp;
 485         int (**vops)(void *);
 486         struct vnode **vpp;
 487 {
 488         struct proc *p = current_proc();        /* XXX */
 489         struct vnode *vp;
 490         int cnt, didretry = 0;
 491         static int reused = 0;                          /* track the reuse rate */
 492         int reclaimhits = 0;
 493
 494 retry:
 495         simple_lock(&vnode_free_list_slock);
 496         /*
 497          * MALLOC a vnode if the number of vnodes has not reached the desired
 498          * value and the number on the free list is still reasonable...
 499          * reuse from the freelist even though we may evict a name cache entry
 500          * to reduce the number of vnodes that accumulate.... vnodes tie up
 501          * wired memory and are never garbage collected
 502          */
 503         if (numvnodes < desiredvnodes && (freevnodes < (2 * VNODE_FREE_MIN))) {
 504                 numvnodes++;
 505                 simple_unlock(&vnode_free_list_slock);
 506                 MALLOC_ZONE(vp, struct vnode *, sizeof *vp, M_VNODE, M_WAITOK);
 507                 bzero((char *)vp, sizeof *vp);
 508                 VLISTNONE(vp);          /* avoid double queue removal */
 509                 simple_lock_init(&vp->v_interlock);
 510                 goto done;
 511         }
 512
 513         /*
 514          * Once the desired number of vnodes are allocated,
 515          * we start reusing the vnodes.
 516          */
 517         if (freevnodes < VNODE_FREE_MIN) {
 518                 /*
 519                  * if we are low on vnodes on the freelist attempt to get
 520                  * some back from the inactive list and VM object cache
 521                  */
 522                 simple_unlock(&vnode_free_list_slock);
 523                 (void)vnreclaim(vnodetarget);
 524                 simple_lock(&vnode_free_list_slock);
 525         }
 526         if (numvnodes >= desiredvnodes && reused > VNODE_TOOMANY_REUSED) {
 527                 reused = 0;
 528                 if (freevnodes < VNODE_FREE_ENOUGH) {
 529                         simple_unlock(&vnode_free_list_slock);
 530                         (void)vnreclaim(vnodetarget);
 531                         simple_lock(&vnode_free_list_slock);
 532                 }
 533         }
 534
 535         for (cnt = 0, vp = vnode_free_list.tqh_first;
 536                         vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
 537                 if (simple_lock_try(&vp->v_interlock)) {
 538                         /* got the interlock */
 539                         if (ISSET(vp->v_flag, VORECLAIM)) {
 540                                 /* skip over the vnodes that are being reclaimed */
 541                                 simple_unlock(&vp->v_interlock);
 542                                 reclaimhits++;
 543                         } else
 544                         break;
 545         }
 546         }
 547
 548         /*
 549          * Unless this is a bad time of the month, at most
 550          * the first NCPUS items on the free list are
 551          * locked, so this is close enough to being empty.
 552          */
 553         if (vp == NULLVP) {
 554                 simple_unlock(&vnode_free_list_slock);
 555                 if (!(didretry++) && (vnreclaim(vnodetarget) > 0))
 556                         goto retry;
 557                 tablefull("vnode");
 558                 log(LOG_EMERG, "%d vnodes locked, %d desired, %d numvnodes, "
 559                         "%d free, %d inactive, %d being reclaimed\n",
 560                         cnt, desiredvnodes, numvnodes, freevnodes, inactivevnodes,
 561                         reclaimhits);
 562                 *vpp = 0;
 563                 return (ENFILE);
 564         }
 565
 566         if (vp->v_usecount)
 567                 panic("free vnode isn't: v_type = %d, v_usecount = %d?",
 568                                 vp->v_type, vp->v_usecount);
 569
 570         VREMFREE("getnewvnode", vp);
 571         reused++;
 572         simple_unlock(&vnode_free_list_slock);
 573         vp->v_lease = NULL;
 574         cache_purge(vp);
 575         if (vp->v_type != VBAD)
 576                 vgonel(vp, p);  /* clean and reclaim the vnode */
 577         else
 578                 simple_unlock(&vp->v_interlock);
 579 #if DIAGNOSTIC
 580         if (vp->v_data)
 581                 panic("cleaned vnode isn't");
 582         {
 583         int s = splbio();
 584         if (vp->v_numoutput)
 585                 panic("Clean vnode has pending I/O's");
 586         splx(s);
 587         }
 588 #endif
 589         if (UBCINFOEXISTS(vp))
 590                 panic("getnewvnode: ubcinfo not cleaned");
 591         else
 592                 vp->v_ubcinfo = 0;
 593
 594         vp->v_lastr = -1;
 595         vp->v_ralen = 0;
 596         vp->v_maxra = 0;
 597         vp->v_lastw = 0;
 598         vp->v_ciosiz = 0;
 599         vp->v_cstart = 0;
 600         vp->v_clen = 0;
 601         vp->v_socket = 0;
 602
 603 done:
 604         vp->v_flag = VSTANDARD;
 605         vp->v_type = VNON;
 606         vp->v_tag = tag;
 607         vp->v_op = vops;
 608         insmntque(vp, mp);
 609         *vpp = vp;
 610         vp->v_usecount = 1;
 611         vp->v_data = 0;
 612         return (0);
 613 }
 614
 615 /*
 616  * Move a vnode from one mount queue to another.
 617  */
 618 void
 619 insmntque(vp, mp)
 620         struct vnode *vp;
 621         struct mount *mp;
 622 {
 623
 624         simple_lock(&mntvnode_slock);
 625         /*
 626          * Delete from old mount point vnode list, if on one.
 627          */
 628         if (vp->v_mount != NULL)
 629                 LIST_REMOVE(vp, v_mntvnodes);
 630         /*
 631          * Insert into list of vnodes for the new mount point, if available.
 632          */
 633         if ((vp->v_mount = mp) != NULL)
 634                 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
 635         simple_unlock(&mntvnode_slock);
 636 }
 637
 638 /*
 639  * Update outstanding I/O count and do wakeup if requested.
 640  */
 641 void
 642 vwakeup(bp)
 643         register struct buf *bp;
 644 {
 645         register struct vnode *vp;
 646
 647         CLR(bp->b_flags, B_WRITEINPROG);
 648         if (vp = bp->b_vp) {
 649                 if (--vp->v_numoutput < 0)
 650                         panic("vwakeup: neg numoutput");
 651                 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
 652                         if (vp->v_numoutput < 0)
 653                                 panic("vwakeup: neg numoutput 2");
 654                         vp->v_flag &= ~VBWAIT;
 655                         wakeup((caddr_t)&vp->v_numoutput);
 656                 }
 657         }
 658 }
 659
 660 /*
 661  * Flush out and invalidate all buffers associated with a vnode.
 662  * Called with the underlying object locked.
 663  */
 664 int
 665 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 666         register struct vnode *vp;
 667         int flags;
 668         struct ucred *cred;
 669         struct proc *p;
 670         int slpflag, slptimeo;
 671 {
 672         register struct buf *bp;
 673         struct buf *nbp, *blist;
 674         int s, error = 0;
 675
 676         if (flags & V_SAVE) {
 677                 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) {
 678                         return (error);
 679                 }
 680                 if (vp->v_dirtyblkhd.lh_first != NULL || (vp->v_flag & VHASDIRTY))
 681                         panic("vinvalbuf: dirty bufs");
 682         }
 683
 684         for (;;) {
 685                 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
 686                         while (blist && blist->b_lblkno < 0)
 687                                 blist = blist->b_vnbufs.le_next;
 688                 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
 689                     (flags & V_SAVEMETA))
 690                         while (blist && blist->b_lblkno < 0)
 691                                 blist = blist->b_vnbufs.le_next;
 692                 if (!blist)
 693                         break;
 694
 695                 for (bp = blist; bp; bp = nbp) {
 696                         nbp = bp->b_vnbufs.le_next;
 697                         if (flags & V_SAVEMETA && bp->b_lblkno < 0)
 698                                 continue;
 699                         s = splbio();
 700                         if (ISSET(bp->b_flags, B_BUSY)) {
 701                                 SET(bp->b_flags, B_WANTED);
 702                                 error = tsleep((caddr_t)bp,
 703                                         slpflag | (PRIBIO + 1), "vinvalbuf",
 704                                         slptimeo);
 705                                 splx(s);
 706                                 if (error) {
 707                                         return (error);
 708                                 }
 709                                 break;
 710                         }
 711                         bremfree(bp);
 712                         SET(bp->b_flags, B_BUSY);
 713                         splx(s);
 714                         /*
 715                          * XXX Since there are no node locks for NFS, I believe
 716                          * there is a slight chance that a delayed write will
 717                          * occur while sleeping just above, so check for it.
 718                          */
 719                         if (ISSET(bp->b_flags, B_DELWRI) && (flags & V_SAVE)) {
 720                                 (void) VOP_BWRITE(bp);
 721                                 break;
 722                         }
 723                         SET(bp->b_flags, B_INVAL);
 724                         brelse(bp);
 725                 }
 726         }
 727         if (!(flags & V_SAVEMETA) &&
 728             (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
 729                 panic("vinvalbuf: flush failed");
 730         return (0);
 731 }
 732
 733 /*
 734  * Associate a buffer with a vnode.
 735  */
 736 void
 737 bgetvp(vp, bp)
 738         register struct vnode *vp;
 739         register struct buf *bp;
 740 {
 741
 742         if (bp->b_vp)
 743                 panic("bgetvp: not free");
 744         VHOLD(vp);
 745         bp->b_vp = vp;
 746         if (vp->v_type == VBLK || vp->v_type == VCHR)
 747                 bp->b_dev = vp->v_rdev;
 748         else
 749                 bp->b_dev = NODEV;
 750         /*
 751          * Insert onto list for new vnode.
 752          */
 753         bufinsvn(bp, &vp->v_cleanblkhd);
 754 }
 755
 756 /*
 757  * Disassociate a buffer from a vnode.
 758  */
 759 void
 760 brelvp(bp)
 761         register struct buf *bp;
 762 {
 763         struct vnode *vp;
 764
 765         if (bp->b_vp == (struct vnode *) 0)
 766                 panic("brelvp: NULL");
 767         /*
 768          * Delete from old vnode list, if on one.
 769          */
 770         if (bp->b_vnbufs.le_next != NOLIST)
 771                 bufremvn(bp);
 772         vp = bp->b_vp;
 773         bp->b_vp = (struct vnode *) 0;
 774         HOLDRELE(vp);
 775 }
 776
 777 /*
 778  * Reassign a buffer from one vnode to another.
 779  * Used to assign file specific control information
 780  * (indirect blocks) to the vnode to which they belong.
 781  */
 782 void
 783 reassignbuf(bp, newvp)
 784         register struct buf *bp;
 785         register struct vnode *newvp;
 786 {
 787         register struct buflists *listheadp;
 788
 789         if (newvp == NULL) {
 790                 printf("reassignbuf: NULL");
 791                 return;
 792         }
 793         /*
 794          * Delete from old vnode list, if on one.
 795          */
 796         if (bp->b_vnbufs.le_next != NOLIST)
 797                 bufremvn(bp);
 798         /*
 799          * If dirty, put on list of dirty buffers;
 800          * otherwise insert onto list of clean buffers.
 801          */
 802         if (ISSET(bp->b_flags, B_DELWRI))
 803                 listheadp = &newvp->v_dirtyblkhd;
 804         else
 805                 listheadp = &newvp->v_cleanblkhd;
 806         bufinsvn(bp, listheadp);
 807 }
 808
 809 /*
 810  * Create a vnode for a block device.
 811  * Used for root filesystem, argdev, and swap areas.
 812  * Also used for memory file system special devices.
 813  */
 814 int
 815 bdevvp(dev, vpp)
 816         dev_t dev;
 817         struct vnode **vpp;
 818 {
 819         register struct vnode *vp;
 820         struct vnode *nvp;
 821         int error;
 822
 823         if (dev == NODEV) {
 824                 *vpp = NULLVP;
 825                 return (ENODEV);
 826         }
 827         error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
 828         if (error) {
 829                 *vpp = NULLVP;
 830                 return (error);
 831         }
 832         vp = nvp;
 833         vp->v_type = VBLK;
 834         if (nvp = checkalias(vp, dev, (struct mount *)0)) {
 835                 vput(vp);
 836                 vp = nvp;
 837         }
 838         *vpp = vp;
 839         return (0);
 840 }
 841
 842 /*
 843  * Check to see if the new vnode represents a special device
 844  * for which we already have a vnode (either because of
 845  * bdevvp() or because of a different vnode representing
 846  * the same block device). If such an alias exists, deallocate
 847  * the existing contents and return the aliased vnode. The
 848  * caller is responsible for filling it with its new contents.
 849  */
 850 struct vnode *
 851 checkalias(nvp, nvp_rdev, mp)
 852         register struct vnode *nvp;
 853         dev_t nvp_rdev;
 854         struct mount *mp;
 855 {
 856         struct proc *p = current_proc();        /* XXX */
 857         struct vnode *vp;
 858         struct vnode **vpp;
 859         struct specinfo * bufhold;
 860         int buffree = 1;
 861
 862         if (nvp->v_type != VBLK && nvp->v_type != VCHR)
 863                 return (NULLVP);
 864
 865         bufhold = (struct specinfo *)_MALLOC_ZONE(sizeof(struct specinfo),
 866                         M_VNODE, M_WAITOK);
 867         vpp = &speclisth[SPECHASH(nvp_rdev)];
 868 loop:
 869         simple_lock(&spechash_slock);
 870         for (vp = *vpp; vp; vp = vp->v_specnext) {
 871                 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
 872                         continue;
 873                 /*
 874                  * Alias, but not in use, so flush it out.
 875                  */
 876                 simple_lock(&vp->v_interlock);
 877                 if (vp->v_usecount == 0) {
 878                         simple_unlock(&spechash_slock);
 879                         vgonel(vp, p);
 880                         goto loop;
 881                 }
 882                 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
 883                         simple_unlock(&spechash_slock);
 884                         goto loop;
 885                 }
 886                 break;
 887         }
 888         if (vp == NULL || vp->v_tag != VT_NON) {
 889                 nvp->v_specinfo = bufhold;
 890                 buffree = 0;    /* buffer used */
 891                 bzero(nvp->v_specinfo, sizeof(struct specinfo));
 892                 nvp->v_rdev = nvp_rdev;
 893                 nvp->v_hashchain = vpp;
 894                 nvp->v_specnext = *vpp;
 895                 nvp->v_specflags = 0;
 896                 simple_unlock(&spechash_slock);
 897                 *vpp = nvp;
 898                 if (vp != NULLVP) {
 899                         nvp->v_flag |= VALIASED;
 900                         vp->v_flag |= VALIASED;
 901                         vput(vp);
 902                 }
 903                 /* Since buffer is used just return */
 904                 return (NULLVP);
 905         }
 906         simple_unlock(&spechash_slock);
 907         VOP_UNLOCK(vp, 0, p);
 908         simple_lock(&vp->v_interlock);
 909         vclean(vp, 0, p);
 910         vp->v_op = nvp->v_op;
 911         vp->v_tag = nvp->v_tag;
 912         nvp->v_type = VNON;
 913         insmntque(vp, mp);
 914         if (buffree)
 915                 _FREE_ZONE((void *)bufhold, sizeof (struct specinfo), M_VNODE);
 916         return (vp);
 917 }
 918
 919 /*
 920  * Get a reference on a particular vnode and lock it if requested.
 921  * If the vnode was on the inactive list, remove it from the list.
 922  * If the vnode was on the free list, remove it from the list and
 923  * move it to inactive list as needed.
 924  * The vnode lock bit is set if the vnode is being eliminated in
 925  * vgone. The process is awakened when the transition is completed,
 926  * and an error returned to indicate that the vnode is no longer
 927  * usable (possibly having been changed to a new file system type).
 928  */
 929 int
 930 vget(vp, flags, p)
 931         struct vnode *vp;
 932         int flags;
 933         struct proc *p;
 934 {
 935         int error = 0;
 936
 937         /*
 938          * If the vnode is in the process of being cleaned out for
 939          * another use, we wait for the cleaning to finish and then
 940          * return failure. Cleaning is determined by checking that
 941          * the VXLOCK flag is set.
 942          */
 943         if ((flags & LK_INTERLOCK) == 0)
 944                 simple_lock(&vp->v_interlock);
 945         if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
 946                 vp->v_flag |= VXWANT;
 947                 simple_unlock(&vp->v_interlock);
 948                 (void)tsleep((caddr_t)vp, PINOD, "vget", 0);
 949                 return (ENOENT);
 950         }
 951
 952         /*
 953          * vnode is being terminated.
 954          * wait for vnode_pager_no_senders() to clear VTERMINATE
 955          */
 956         if (ISSET(vp->v_flag, VTERMINATE)) {
 957                 SET(vp->v_flag, VTERMWANT);
 958                 simple_unlock(&vp->v_interlock);
 959                 (void)tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vclean", 0);
 960                 return (ENOENT);
 961         }
 962
 963         simple_lock(&vnode_free_list_slock);
 964         if (vp->v_usecount == 0) {
 965                 /* If on the free list, remove it from there */
 966                 if (VONLIST(vp))
 967                         VREMFREE("vget", vp);
 968         } else {
 969                 /* If on the inactive list, remove it from there */
 970                 if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
 971                         if (VONLIST(vp))
 972                                 VREMINACTIVE("vget", vp);
 973                 }
 974         }
 975
 976         /* The vnode should not be on the inactive list here */
 977         VINACTIVECHECK("vget", vp, 0);
 978
 979         simple_unlock(&vnode_free_list_slock);
 980
 981         if (++vp->v_usecount <= 0)
 982                 panic("vget: v_usecount");
 983
 984         /*
 985          * Recover named reference as needed
 986          */
 987         if (UBCISVALID(vp) && !ubc_issetflags(vp, UI_HASOBJREF)) {
 988                 simple_unlock(&vp->v_interlock);
 989                 if (ubc_getobject(vp, UBC_HOLDOBJECT)) {
 990                         error = ENOENT;
 991                         goto errout;
 992                 }
 993                 simple_lock(&vp->v_interlock);
 994         }
 995
 996         if (flags & LK_TYPE_MASK) {
 997                 if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
 998                         goto errout;
 999                 return (0);
1000         }
1001
1002         if ((flags & LK_INTERLOCK) == 0)
1003                 simple_unlock(&vp->v_interlock);
1004         return (0);
1005
1006 errout:
1007         /*
1008          * If the vnode was not active in the first place
1009          * must not call vrele() as VOP_INACTIVE() is not
1010          * required.
1011          * So inlined part of vrele() here.
1012          */
1013         simple_lock(&vp->v_interlock);
1014         if (--vp->v_usecount == 1) {
1015                 if (UBCINFOEXISTS(vp)) {
1016                         vinactive(vp);
1017                         simple_unlock(&vp->v_interlock);
1018                         return (error);
1019                 }
1020         }
1021         if (vp->v_usecount > 0) {
1022                 simple_unlock(&vp->v_interlock);
1023                 return (error);
1024         }
1025         if (vp->v_usecount < 0)
1026                 panic("vget: negative usecount (%d)", vp->v_usecount);
1027         vfree(vp);
1028         simple_unlock(&vp->v_interlock);
1029         return (error);
1030 }
1031
1032 /*
1033  * Get a pager reference on the particular vnode.
1034  *
1035  * This is called from ubc_info_init() and it is asumed that
1036  * the vnode is neither on the free list on on the inactive list.
1037  * It is also assumed that the vnode is neither being recycled
1038  * by vgonel nor being terminated by vnode_pager_vrele().
1039  *
1040  * The vnode interlock is NOT held by the caller.
1041  */
1042 __private_extern__ int
1043 vnode_pager_vget(vp)
1044         struct vnode *vp;
1045 {
1046         simple_lock(&vp->v_interlock);
1047         if (UBCINFOMISSING(vp))
1048                 panic("vnode_pager_vget: stolen ubc_info");
1049
1050         if (!UBCINFOEXISTS(vp))
1051                 panic("vnode_pager_vget: lost ubc_info");
1052
1053         if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM))
1054                 panic("vnode_pager_vget: already being reclaimd");
1055
1056         if (ISSET(vp->v_flag, VTERMINATE))
1057                 panic("vnode_pager_vget: already being terminated");
1058
1059         simple_lock(&vnode_free_list_slock);
1060         /* The vnode should not be on ANY list */
1061         if (VONLIST(vp))
1062                 panic("vnode_pager_vget: still on the list");
1063
1064         /* The vnode should not be on the inactive list here */
1065         VINACTIVECHECK("vnode_pager_vget", vp, 0);
1066         simple_unlock(&vnode_free_list_slock);
1067
1068         /* After all those checks, now do the real work :-) */
1069         if (++vp->v_usecount <= 0)
1070                 panic("vnode_pager_vget: v_usecount");
1071         simple_unlock(&vp->v_interlock);
1072
1073         return (0);
1074 }
1075
1076 /*
1077  * Stubs to use when there is no locking to be done on the underlying object.
1078  * A minimal shared lock is necessary to ensure that the underlying object
1079  * is not revoked while an operation is in progress. So, an active shared
1080  * count is maintained in an auxillary vnode lock structure.
1081  */
1082 int
1083 vop_nolock(ap)
1084         struct vop_lock_args /* {
1085                 struct vnode *a_vp;
1086                 int a_flags;
1087                 struct proc *a_p;
1088         } */ *ap;
1089 {
1090 #ifdef notyet
1091         /*
1092          * This code cannot be used until all the non-locking filesystems
1093          * (notably NFS) are converted to properly lock and release nodes.
1094          * Also, certain vnode operations change the locking state within
1095          * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
1096          * and symlink). Ideally these operations should not change the
1097          * lock state, but should be changed to let the caller of the
1098          * function unlock them. Otherwise all intermediate vnode layers
1099          * (such as union, umapfs, etc) must catch these functions to do
1100          * the necessary locking at their layer. Note that the inactive
1101          * and lookup operations also change their lock state, but this
1102          * cannot be avoided, so these two operations will always need
1103          * to be handled in intermediate layers.
1104          */
1105         struct vnode *vp = ap->a_vp;
1106         int vnflags, flags = ap->a_flags;
1107
1108         if (vp->v_vnlock == NULL) {
1109                 if ((flags & LK_TYPE_MASK) == LK_DRAIN)
1110                         return (0);
1111                 MALLOC_ZONE(vp->v_vnlock, struct lock__bsd__ *,
1112                                 sizeof(struct lock__bsd__), M_VNODE, M_WAITOK);
1113                 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1114         }
1115         switch (flags & LK_TYPE_MASK) {
1116         case LK_DRAIN:
1117                 vnflags = LK_DRAIN;
1118                 break;
1119         case LK_EXCLUSIVE:
1120         case LK_SHARED:
1121                 vnflags = LK_SHARED;
1122                 break;
1123         case LK_UPGRADE:
1124         case LK_EXCLUPGRADE:
1125         case LK_DOWNGRADE:
1126                 return (0);
1127         case LK_RELEASE:
1128         default:
1129                 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
1130         }
1131         if (flags & LK_INTERLOCK)
1132                 vnflags |= LK_INTERLOCK;
1133         return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
1134 #else /* for now */
1135         /*
1136          * Since we are not using the lock manager, we must clear
1137          * the interlock here.
1138          */
1139         if (ap->a_flags & LK_INTERLOCK)
1140                 simple_unlock(&ap->a_vp->v_interlock);
1141         return (0);
1142 #endif
1143 }
1144
1145 /*
1146  * Decrement the active use count.
1147  */
1148 int
1149 vop_nounlock(ap)
1150         struct vop_unlock_args /* {
1151                 struct vnode *a_vp;
1152                 int a_flags;
1153                 struct proc *a_p;
1154         } */ *ap;
1155 {
1156         struct vnode *vp = ap->a_vp;
1157
1158         if (vp->v_vnlock == NULL)
1159                 return (0);
1160         return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p));
1161 }
1162
1163 /*
1164  * Return whether or not the node is in use.
1165  */
1166 int
1167 vop_noislocked(ap)
1168         struct vop_islocked_args /* {
1169                 struct vnode *a_vp;
1170         } */ *ap;
1171 {
1172         struct vnode *vp = ap->a_vp;
1173
1174         if (vp->v_vnlock == NULL)
1175                 return (0);
1176         return (lockstatus(vp->v_vnlock));
1177 }
1178
1179 /*
1180  * Vnode reference.
1181  */
1182 void
1183 vref(vp)
1184         struct vnode *vp;
1185 {
1186
1187         simple_lock(&vp->v_interlock);
1188         if (vp->v_usecount <= 0)
1189                 panic("vref used where vget required");
1190
1191         /* If on the inactive list, remove it from there */
1192         if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
1193                 if (VONLIST(vp)) {
1194                         simple_lock(&vnode_free_list_slock);
1195                         VREMINACTIVE("vref", vp);
1196                         simple_unlock(&vnode_free_list_slock);
1197                 }
1198         }
1199         /* The vnode should not be on the inactive list here */
1200         VINACTIVECHECK("vref", vp, 0);
1201
1202         if (++vp->v_usecount <= 0)
1203                 panic("vref v_usecount");
1204         simple_unlock(&vp->v_interlock);
1205 }
1206
1207 /*
1208  * put the vnode on appropriate free list.
1209  * called with v_interlock held.
1210  */
1211 static void
1212 vfree(vp)
1213         struct vnode *vp;
1214 {
1215         /*
1216          * if the vnode is not obtained by calling getnewvnode() we
1217          * are not responsible for the cleanup. Just return.
1218          */
1219         if (!(vp->v_flag & VSTANDARD)) {
1220                 return;
1221         }
1222
1223         if (vp->v_usecount != 0)
1224                 panic("vfree: v_usecount");
1225
1226         /* insert at tail of LRU list or at head if VAGE is set */
1227         simple_lock(&vnode_free_list_slock);
1228
1229         if (VONLIST(vp))
1230                  panic("vfree: vnode still on list");
1231
1232         if (vp->v_flag & VAGE) {
1233                 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1234                 vp->v_flag &= ~VAGE;
1235         } else
1236                 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1237         freevnodes++;
1238         simple_unlock(&vnode_free_list_slock);
1239         return;
1240 }
1241
1242 /*
1243  * put the vnode on the inactive list.
1244  * called with v_interlock held
1245  */
1246 static void
1247 vinactive(vp)
1248         struct vnode *vp;
1249 {
1250         if (!UBCINFOEXISTS(vp))
1251                 panic("vinactive: not a UBC vnode");
1252
1253         if (vp->v_usecount != 1)
1254                 panic("vinactive: v_usecount");
1255
1256         simple_lock(&vnode_free_list_slock);
1257
1258         if (VONLIST(vp))
1259                  panic("vinactive: vnode still on list");
1260         VINACTIVECHECK("vinactive", vp, 0);
1261
1262         TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_freelist);
1263         SET(vp->v_flag, VUINACTIVE);
1264         CLR(vp->v_flag, (VNOCACHE_DATA | VRAOFF));
1265
1266         inactivevnodes++;
1267         simple_unlock(&vnode_free_list_slock);
1268         return;
1269 }
1270
1271
1272 /*
1273  * vput(), just unlock and vrele()
1274  */
1275 void
1276 vput(vp)
1277         struct vnode *vp;
1278 {
1279         struct proc *p = current_proc();        /* XXX */
1280
1281         simple_lock(&vp->v_interlock);
1282         if (--vp->v_usecount == 1) {
1283                 if (UBCINFOEXISTS(vp)) {
1284                         vinactive(vp);
1285                         simple_unlock(&vp->v_interlock);
1286                         VOP_UNLOCK(vp, 0, p);
1287                         return;
1288                 }
1289         }
1290         if (vp->v_usecount > 0) {
1291                 simple_unlock(&vp->v_interlock);
1292                 VOP_UNLOCK(vp, 0, p);
1293                 return;
1294         }
1295 #if DIAGNOSTIC
1296         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1297                 vprint("vput: bad ref count", vp);
1298                 panic("vput: v_usecount = %d, v_writecount = %d",
1299                         vp->v_usecount, vp->v_writecount);
1300         }
1301 #endif
1302         if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
1303                 VREMINACTIVE("vrele", vp);
1304
1305         simple_unlock(&vp->v_interlock);
1306         VOP_INACTIVE(vp, p);
1307         /*
1308          * The interlock is not held and
1309          * VOP_INCATIVE releases the vnode lock.
1310          * We could block and the vnode might get reactivated
1311          * Can not just call vfree without checking the state
1312          */
1313         simple_lock(&vp->v_interlock);
1314         if (!VONLIST(vp)) {
1315                 if (vp->v_usecount == 0)
1316                         vfree(vp);
1317                 else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp))
1318                         vinactive(vp);
1319         }
1320         simple_unlock(&vp->v_interlock);
1321 }
1322
1323 /*
1324  * Vnode release.
1325  * If count drops to zero, call inactive routine and return to freelist.
1326  */
1327 void
1328 vrele(vp)
1329         struct vnode *vp;
1330 {
1331         struct proc *p = current_proc();        /* XXX */
1332
1333         simple_lock(&vp->v_interlock);
1334         if (--vp->v_usecount == 1) {
1335                 if (UBCINFOEXISTS(vp)) {
1336                         vinactive(vp);
1337                         simple_unlock(&vp->v_interlock);
1338                         return;
1339                 }
1340         }
1341         if (vp->v_usecount > 0) {
1342                 simple_unlock(&vp->v_interlock);
1343                 return;
1344         }
1345 #if DIAGNOSTIC
1346         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1347                 vprint("vrele: bad ref count", vp);
1348                 panic("vrele: ref cnt");
1349         }
1350 #endif
1351         if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
1352                 VREMINACTIVE("vrele", vp);
1353
1354
1355         if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
1356                 /* vnode is being cleaned, just return */
1357                 vfree(vp);
1358                 simple_unlock(&vp->v_interlock);
1359                 return;
1360         }
1361
1362         if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1363                 VOP_INACTIVE(vp, p);
1364                 /*
1365                  * vn_lock releases the interlock and
1366                  * VOP_INCATIVE releases the vnode lock.
1367                  * We could block and the vnode might get reactivated
1368                  * Can not just call vfree without checking the state
1369                  */
1370                 simple_lock(&vp->v_interlock);
1371                 if (!VONLIST(vp)) {
1372                         if (vp->v_usecount == 0)
1373                                 vfree(vp);
1374                         else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp))
1375                                 vinactive(vp);
1376                 }
1377                 simple_unlock(&vp->v_interlock);
1378         }
1379 #if 0
1380         else {
1381                 vfree(vp);
1382                 simple_unlock(&vp->v_interlock);
1383                 kprintf("vrele: vn_lock() failed for vp = 0x%08x\n", vp);
1384         }
1385 #endif
1386 }
1387
1388 void
1389 vagevp(vp)
1390         struct vnode *vp;
1391 {
1392         simple_lock(&vp->v_interlock);
1393         vp->v_flag |= VAGE;
1394         simple_unlock(&vp->v_interlock);
1395         return;
1396 }
1397
1398 /*
1399  * Page or buffer structure gets a reference.
1400  */
1401 void
1402 vhold(vp)
1403         register struct vnode *vp;
1404 {
1405
1406         simple_lock(&vp->v_interlock);
1407         vp->v_holdcnt++;
1408         simple_unlock(&vp->v_interlock);
1409 }
1410
1411 /*
1412  * Page or buffer structure frees a reference.
1413  */
1414 void
1415 holdrele(vp)
1416         register struct vnode *vp;
1417 {
1418
1419         simple_lock(&vp->v_interlock);
1420         if (vp->v_holdcnt <= 0)
1421                 panic("holdrele: holdcnt");
1422         vp->v_holdcnt--;
1423         simple_unlock(&vp->v_interlock);
1424 }
1425
1426 /*
1427  * Remove any vnodes in the vnode table belonging to mount point mp.
1428  *
1429  * If MNT_NOFORCE is specified, there should not be any active ones,
1430  * return error if any are found (nb: this is a user error, not a
1431  * system error). If MNT_FORCE is specified, detach any active vnodes
1432  * that are found.
1433  */
1434 #if DIAGNOSTIC
1435 int busyprt = 0;        /* print out busy vnodes */
1436 #if 0
1437 struct ctldebug debug1 = { "busyprt", &busyprt };
1438 #endif /* 0 */
1439 #endif
1440
1441 int
1442 vflush(mp, skipvp, flags)
1443         struct mount *mp;
1444         struct vnode *skipvp;
1445         int flags;
1446 {
1447         struct proc *p = current_proc();
1448         struct vnode *vp, *nvp;
1449         int busy = 0;
1450
1451         simple_lock(&mntvnode_slock);
1452 loop:
1453         for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1454                 if (vp->v_mount != mp)
1455                         goto loop;
1456                 nvp = vp->v_mntvnodes.le_next;
1457                 /*
1458                  * Skip over a selected vnode.
1459                  */
1460                 if (vp == skipvp)
1461                         continue;
1462
1463                 simple_lock(&vp->v_interlock);
1464                 /*
1465                  * Skip over a vnodes marked VSYSTEM.
1466                  */
1467                 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1468                         simple_unlock(&vp->v_interlock);
1469                         continue;
1470                 }
1471                 /*
1472                  * Skip over a vnodes marked VSWAP.
1473                  */
1474                 if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) {
1475                         simple_unlock(&vp->v_interlock);
1476                         continue;
1477                 }
1478                 /*
1479                  * If WRITECLOSE is set, only flush out regular file
1480                  * vnodes open for writing.
1481                  */
1482                 if ((flags & WRITECLOSE) &&
1483                     (vp->v_writecount == 0 || vp->v_type != VREG)) {
1484                         simple_unlock(&vp->v_interlock);
1485                         continue;
1486                 }
1487                 /*
1488                  * With v_usecount == 0, all we need to do is clear
1489                  * out the vnode data structures and we are done.
1490                  */
1491                 if (vp->v_usecount == 0) {
1492                         simple_unlock(&mntvnode_slock);
1493                         vgonel(vp, p);
1494                         simple_lock(&mntvnode_slock);
1495                         continue;
1496                 }
1497                 /*
1498                  * If FORCECLOSE is set, forcibly close the vnode.
1499                  * For block or character devices, revert to an
1500                  * anonymous device. For all other files, just kill them.
1501                  */
1502                 if (flags & FORCECLOSE) {
1503                         simple_unlock(&mntvnode_slock);
1504                         if (vp->v_type != VBLK && vp->v_type != VCHR) {
1505                                 vgonel(vp, p);
1506                         } else {
1507                                 vclean(vp, 0, p);
1508                                 vp->v_op = spec_vnodeop_p;
1509                                 insmntque(vp, (struct mount *)0);
1510                         }
1511                         simple_lock(&mntvnode_slock);
1512                         continue;
1513                 }
1514 #if DIAGNOSTIC
1515                 if (busyprt)
1516                         vprint("vflush: busy vnode", vp);
1517 #endif
1518                 simple_unlock(&vp->v_interlock);
1519                 busy++;
1520         }
1521         simple_unlock(&mntvnode_slock);
1522         if (busy)
1523                 return (EBUSY);
1524         return (0);
1525 }
1526
1527 /*
1528  * Disassociate the underlying file system from a vnode.
1529  * The vnode interlock is held on entry.
1530  */
1531 static void
1532 vclean(vp, flags, p)
1533         struct vnode *vp;
1534         int flags;
1535         struct proc *p;
1536 {
1537         int active;
1538         void *obj;
1539         kern_return_t kret;
1540         int removed = 0;
1541         int didhold;
1542
1543         /*
1544          * if the vnode is not obtained by calling getnewvnode() we
1545          * are not responsible for the cleanup. Just return.
1546          */
1547         if (!(vp->v_flag & VSTANDARD)) {
1548                 simple_unlock(&vp->v_interlock);
1549                 return;
1550         }
1551
1552         /*
1553          * Check to see if the vnode is in use.
1554          * If so we have to reference it before we clean it out
1555          * so that its count cannot fall to zero and generate a
1556          * race against ourselves to recycle it.
1557          */
1558         if (active = vp->v_usecount)
1559                 if (++vp->v_usecount <= 0)
1560                         panic("vclean: v_usecount");
1561         /*
1562          * Prevent the vnode from being recycled or
1563          * brought into use while we clean it out.
1564          */
1565         if (vp->v_flag & VXLOCK)
1566                 panic("vclean: deadlock");
1567         vp->v_flag |= VXLOCK;
1568
1569         /*
1570          * Even if the count is zero, the VOP_INACTIVE routine may still
1571          * have the object locked while it cleans it out. The VOP_LOCK
1572          * ensures that the VOP_INACTIVE routine is done with its work.
1573          * For active vnodes, it ensures that no other activity can
1574          * occur while the underlying object is being cleaned out.
1575          */
1576         VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1577
1578         /*
1579          * if this vnode is on the inactive list
1580          * take it off the list.
1581          */
1582         if ((active == 1) &&
1583                 (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))) {
1584                 simple_lock(&vnode_free_list_slock);
1585                 VREMINACTIVE("vclean", vp);
1586                 simple_unlock(&vnode_free_list_slock);
1587                 removed++;
1588         }
1589
1590         /* Clean the pages in VM. */
1591         if (active && (flags & DOCLOSE))
1592                 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1593
1594         /* Clean the pages in VM. */
1595         didhold = ubc_hold(vp);
1596         if ((active) && (didhold))
1597                 (void)ubc_clean(vp, 0); /* do not invalidate */
1598
1599         /*
1600          * Clean out any buffers associated with the vnode.
1601          */
1602         if (flags & DOCLOSE) {
1603                 if (vp->v_tag == VT_NFS)
1604             nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
1605         else
1606             vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1607     }
1608
1609         if (active)
1610                 VOP_INACTIVE(vp, p);
1611         else
1612                 VOP_UNLOCK(vp, 0, p);
1613
1614         /* Destroy ubc named reference */
1615     if (didhold) {
1616         ubc_rele(vp);
1617                 ubc_destroy_named(vp);
1618         }
1619
1620         /*
1621          * Reclaim the vnode.
1622          */
1623         if (VOP_RECLAIM(vp, p))
1624                 panic("vclean: cannot reclaim");
1625         cache_purge(vp);
1626         if (vp->v_vnlock) {
1627                 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1628                         vprint("vclean: lock not drained", vp);
1629                 FREE_ZONE(vp->v_vnlock, sizeof (struct lock__bsd__), M_VNODE);
1630                 vp->v_vnlock = NULL;
1631         }
1632
1633         /* It's dead, Jim! */
1634         vp->v_op = dead_vnodeop_p;
1635         vp->v_tag = VT_NON;
1636
1637         /*
1638          * Done with purge, notify sleepers of the grim news.
1639          */
1640         vp->v_flag &= ~VXLOCK;
1641         if (vp->v_flag & VXWANT) {
1642                 vp->v_flag &= ~VXWANT;
1643                 wakeup((caddr_t)vp);
1644         }
1645
1646         if (active)
1647                 vrele(vp);
1648 }
1649
1650 /*
1651  * Eliminate all activity associated with  the requested vnode
1652  * and with all vnodes aliased to the requested vnode.
1653  */
1654 int
1655 vop_revoke(ap)
1656         struct vop_revoke_args /* {
1657                 struct vnode *a_vp;
1658                 int a_flags;
1659         } */ *ap;
1660 {
1661         struct vnode *vp, *vq;
1662         struct proc *p = current_proc();
1663
1664 #if DIAGNOSTIC
1665         if ((ap->a_flags & REVOKEALL) == 0)
1666                 panic("vop_revoke");
1667 #endif
1668
1669         vp = ap->a_vp;
1670         simple_lock(&vp->v_interlock);
1671
1672         if (vp->v_flag & VALIASED) {
1673                 /*
1674                  * If a vgone (or vclean) is already in progress,
1675                  * wait until it is done and return.
1676                  */
1677                 if (vp->v_flag & VXLOCK) {
1678                         while (vp->v_flag & VXLOCK) {
1679                                 vp->v_flag |= VXWANT;
1680                                 simple_unlock(&vp->v_interlock);
1681                                 (void)tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1682                         }
1683                         return (0);
1684                 }
1685                 /*
1686                  * Ensure that vp will not be vgone'd while we
1687                  * are eliminating its aliases.
1688                  */
1689                 vp->v_flag |= VXLOCK;
1690                 simple_unlock(&vp->v_interlock);
1691                 while (vp->v_flag & VALIASED) {
1692                         simple_lock(&spechash_slock);
1693                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1694                                 if (vq->v_rdev != vp->v_rdev ||
1695                                     vq->v_type != vp->v_type || vp == vq)
1696                                         continue;
1697                                 simple_unlock(&spechash_slock);
1698                                 vgone(vq);
1699                                 break;
1700                         }
1701                         if (vq == NULLVP)
1702                                 simple_unlock(&spechash_slock);
1703                 }
1704                 /*
1705                  * Remove the lock so that vgone below will
1706                  * really eliminate the vnode after which time
1707                  * vgone will awaken any sleepers.
1708                  */
1709                 simple_lock(&vp->v_interlock);
1710                 vp->v_flag &= ~VXLOCK;
1711         }
1712         vgonel(vp, p);
1713         return (0);
1714 }
1715
1716 /*
1717  * Recycle an unused vnode to the front of the free list.
1718  * Release the passed interlock if the vnode will be recycled.
1719  */
1720 int
1721 vrecycle(vp, inter_lkp, p)
1722         struct vnode *vp;
1723         struct slock *inter_lkp;
1724         struct proc *p;
1725 {
1726
1727         simple_lock(&vp->v_interlock);
1728         if (vp->v_usecount == 0) {
1729                 if (inter_lkp)
1730                         simple_unlock(inter_lkp);
1731                 vgonel(vp, p);
1732                 return (1);
1733         }
1734         simple_unlock(&vp->v_interlock);
1735         return (0);
1736 }
1737
1738 /*
1739  * Eliminate all activity associated with a vnode
1740  * in preparation for reuse.
1741  */
1742 void
1743 vgone(vp)
1744         struct vnode *vp;
1745 {
1746         struct proc *p = current_proc();
1747
1748         simple_lock(&vp->v_interlock);
1749         vgonel(vp, p);
1750 }
1751
1752 /*
1753  * vgone, with the vp interlock held.
1754  */
1755 void
1756 vgonel(vp, p)
1757         struct vnode *vp;
1758         struct proc *p;
1759 {
1760         struct vnode *vq;
1761         struct vnode *vx;
1762
1763         /*
1764          * if the vnode is not obtained by calling getnewvnode() we
1765          * are not responsible for the cleanup. Just return.
1766          */
1767         if (!(vp->v_flag & VSTANDARD)) {
1768                 simple_unlock(&vp->v_interlock);
1769                 return;
1770         }
1771
1772         /*
1773          * If a vgone (or vclean) is already in progress,
1774          * wait until it is done and return.
1775          */
1776         if (vp->v_flag & VXLOCK) {
1777                 while (vp->v_flag & VXLOCK) {
1778                         vp->v_flag |= VXWANT;
1779                         simple_unlock(&vp->v_interlock);
1780                         (void)tsleep((caddr_t)vp, PINOD, "vgone", 0);
1781                 }
1782                 return;
1783         }
1784         /*
1785          * Clean out the filesystem specific data.
1786          */
1787         vclean(vp, DOCLOSE, p);
1788         /*
1789          * Delete from old mount point vnode list, if on one.
1790          */
1791         if (vp->v_mount != NULL)
1792                 insmntque(vp, (struct mount *)0);
1793         /*
1794          * If special device, remove it from special device alias list
1795          * if it is on one.
1796          */
1797         if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1798                 simple_lock(&spechash_slock);
1799                 if (*vp->v_hashchain == vp) {
1800                         *vp->v_hashchain = vp->v_specnext;
1801                 } else {
1802                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1803                                 if (vq->v_specnext != vp)
1804                                         continue;
1805                                 vq->v_specnext = vp->v_specnext;
1806                                 break;
1807                         }
1808                         if (vq == NULL)
1809                                 panic("missing bdev");
1810                 }
1811                 if (vp->v_flag & VALIASED) {
1812                         vx = NULL;
1813                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1814                                 if (vq->v_rdev != vp->v_rdev ||
1815                                     vq->v_type != vp->v_type)
1816                                         continue;
1817                                 if (vx)
1818                                         break;
1819                                 vx = vq;
1820                         }
1821                         if (vx == NULL)
1822                                 panic("missing alias");
1823                         if (vq == NULL)
1824                                 vx->v_flag &= ~VALIASED;
1825                         vp->v_flag &= ~VALIASED;
1826                 }
1827                 simple_unlock(&spechash_slock);
1828                 FREE_ZONE(vp->v_specinfo, sizeof (struct specinfo), M_VNODE);
1829                 vp->v_specinfo = NULL;
1830         }
1831         /*
1832          * If it is on the freelist and not already at the head,
1833          * move it to the head of the list. The test of the back
1834          * pointer and the reference count of zero is because
1835          * it will be removed from the free list by getnewvnode,
1836          * but will not have its reference count incremented until
1837          * after calling vgone. If the reference count were
1838          * incremented first, vgone would (incorrectly) try to
1839          * close the previous instance of the underlying object.
1840          * So, the back pointer is explicitly set to `0xdeadb' in
1841          * getnewvnode after removing it from the freelist to ensure
1842          * that we do not try to move it here.
1843          */
1844         if (vp->v_usecount == 0) {
1845                 simple_lock(&vnode_free_list_slock);
1846                 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1847                     vnode_free_list.tqh_first != vp) {
1848                         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1849                         TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1850                 }
1851                 simple_unlock(&vnode_free_list_slock);
1852         }
1853         vp->v_type = VBAD;
1854 }
1855
1856 /*
1857  * Lookup a vnode by device number.
1858  */
1859 int
1860 vfinddev(dev, type, vpp)
1861         dev_t dev;
1862         enum vtype type;
1863         struct vnode **vpp;
1864 {
1865         struct vnode *vp;
1866         int rc = 0;
1867
1868         simple_lock(&spechash_slock);
1869         for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1870                 if (dev != vp->v_rdev || type != vp->v_type)
1871                         continue;
1872                 *vpp = vp;
1873                 rc = 1;
1874                 break;
1875         }
1876         simple_unlock(&spechash_slock);
1877         return (rc);
1878 }
1879
1880 /*
1881  * Calculate the total number of references to a special device.
1882  */
1883 int
1884 vcount(vp)
1885         struct vnode *vp;
1886 {
1887         struct vnode *vq, *vnext;
1888         int count;
1889
1890 loop:
1891         if ((vp->v_flag & VALIASED) == 0)
1892                 return (vp->v_usecount);
1893         simple_lock(&spechash_slock);
1894         for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1895                 vnext = vq->v_specnext;
1896                 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1897                         continue;
1898                 /*
1899                  * Alias, but not in use, so flush it out.
1900                  */
1901                 if (vq->v_usecount == 0 && vq != vp) {
1902                         simple_unlock(&spechash_slock);
1903                         vgone(vq);
1904                         goto loop;
1905                 }
1906                 count += vq->v_usecount;
1907         }
1908         simple_unlock(&spechash_slock);
1909         return (count);
1910 }
1911
1912 int     prtactive = 0;          /* 1 => print out reclaim of active vnodes */
1913
1914 /*
1915  * Print out a description of a vnode.
1916  */
1917 static char *typename[] =
1918    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1919
1920 void
1921 vprint(label, vp)
1922         char *label;
1923         register struct vnode *vp;
1924 {
1925         char buf[64];
1926
1927         if (label != NULL)
1928                 printf("%s: ", label);
1929         printf("type %s, usecount %d, writecount %d, refcount %d,",
1930                 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1931                 vp->v_holdcnt);
1932         buf[0] = '\0';
1933         if (vp->v_flag & VROOT)
1934                 strcat(buf, "|VROOT");
1935         if (vp->v_flag & VTEXT)
1936                 strcat(buf, "|VTEXT");
1937         if (vp->v_flag & VSYSTEM)
1938                 strcat(buf, "|VSYSTEM");
1939         if (vp->v_flag & VXLOCK)
1940                 strcat(buf, "|VXLOCK");
1941         if (vp->v_flag & VXWANT)
1942                 strcat(buf, "|VXWANT");
1943         if (vp->v_flag & VBWAIT)
1944                 strcat(buf, "|VBWAIT");
1945         if (vp->v_flag & VALIASED)
1946                 strcat(buf, "|VALIASED");
1947         if (buf[0] != '\0')
1948                 printf(" flags (%s)", &buf[1]);
1949         if (vp->v_data == NULL) {
1950                 printf("\n");
1951         } else {
1952                 printf("\n\t");
1953                 VOP_PRINT(vp);
1954         }
1955 }
1956
1957 #ifdef DEBUG
1958 /*
1959  * List all of the locked vnodes in the system.
1960  * Called when debugging the kernel.
1961  */
1962 void
1963 printlockedvnodes()
1964 {
1965         struct proc *p = current_proc();
1966         struct mount *mp, *nmp;
1967         struct vnode *vp;
1968
1969         printf("Locked vnodes\n");
1970         simple_lock(&mountlist_slock);
1971         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1972                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1973                         nmp = mp->mnt_list.cqe_next;
1974                         continue;
1975                 }
1976                 for (vp = mp->mnt_vnodelist.lh_first;
1977                      vp != NULL;
1978                      vp = vp->v_mntvnodes.le_next) {
1979                         if (VOP_ISLOCKED(vp))
1980                                 vprint((char *)0, vp);
1981                 }
1982                 simple_lock(&mountlist_slock);
1983                 nmp = mp->mnt_list.cqe_next;
1984                 vfs_unbusy(mp, p);
1985         }
1986         simple_unlock(&mountlist_slock);
1987 }
1988 #endif
1989
1990 /*
1991  * Top level filesystem related information gathering.
1992  */
1993 int
1994 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1995         int *name;
1996         u_int namelen;
1997         void *oldp;
1998         size_t *oldlenp;
1999         void *newp;
2000         size_t newlen;
2001         struct proc *p;
2002 {
2003         struct ctldebug *cdp;
2004         struct vfsconf *vfsp;
2005
2006         if (name[0] == VFS_NUMMNTOPS) {
2007                 extern unsigned int vfs_nummntops;
2008                 return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops));
2009         }
2010
2011         /* all sysctl names at this level are at least name and field */
2012         if (namelen < 2)
2013                 return (ENOTDIR);               /* overloaded */
2014         if (name[0] != VFS_GENERIC) {
2015                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2016                         if (vfsp->vfc_typenum == name[0])
2017                                 break;
2018                 if (vfsp == NULL)
2019                         return (EOPNOTSUPP);
2020                 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
2021                     oldp, oldlenp, newp, newlen, p));
2022         }
2023         switch (name[1]) {
2024         case VFS_MAXTYPENUM:
2025                 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
2026         case VFS_CONF:
2027                 if (namelen < 3)
2028                         return (ENOTDIR);       /* overloaded */
2029                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2030                         if (vfsp->vfc_typenum == name[2])
2031                                 break;
2032                 if (vfsp == NULL)
2033                         return (EOPNOTSUPP);
2034                 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
2035                     sizeof(struct vfsconf)));
2036         }
2037         return (EOPNOTSUPP);
2038 }
2039
2040 int kinfo_vdebug = 1;
2041 #define KINFO_VNODESLOP 10
2042 /*
2043  * Dump vnode list (via sysctl).
2044  * Copyout address of vnode followed by vnode.
2045  */
2046 /* ARGSUSED */
2047 int
2048 sysctl_vnode(where, sizep, p)
2049         char *where;
2050         size_t *sizep;
2051         struct proc *p;
2052 {
2053         struct mount *mp, *nmp;
2054         struct vnode *nvp, *vp;
2055         char *bp = where, *savebp;
2056         char *ewhere;
2057         int error;
2058
2059 #define VPTRSZ  sizeof (struct vnode *)
2060 #define VNODESZ sizeof (struct vnode)
2061         if (where == NULL) {
2062                 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2063                 return (0);
2064         }
2065         ewhere = where + *sizep;
2066
2067         simple_lock(&mountlist_slock);
2068         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2069                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
2070                         nmp = mp->mnt_list.cqe_next;
2071                         continue;
2072                 }
2073                 savebp = bp;
2074 again:
2075                 simple_lock(&mntvnode_slock);
2076                 for (vp = mp->mnt_vnodelist.lh_first;
2077                      vp != NULL;
2078                      vp = nvp) {
2079                         /*
2080                          * Check that the vp is still associated with
2081                          * this filesystem.  RACE: could have been
2082                          * recycled onto the same filesystem.
2083                          */
2084                         if (vp->v_mount != mp) {
2085                                 simple_unlock(&mntvnode_slock);
2086                                 if (kinfo_vdebug)
2087                                         printf("kinfo: vp changed\n");
2088                                 bp = savebp;
2089                                 goto again;
2090                         }
2091                         nvp = vp->v_mntvnodes.le_next;
2092                         if (bp + VPTRSZ + VNODESZ > ewhere) {
2093                                 simple_unlock(&mntvnode_slock);
2094                                 *sizep = bp - where;
2095                                 return (ENOMEM);
2096                         }
2097                         simple_unlock(&mntvnode_slock);
2098                         if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
2099                            (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
2100                                 return (error);
2101                         bp += VPTRSZ + VNODESZ;
2102                         simple_lock(&mntvnode_slock);
2103                 }
2104                 simple_unlock(&mntvnode_slock);
2105                 simple_lock(&mountlist_slock);
2106                 nmp = mp->mnt_list.cqe_next;
2107                 vfs_unbusy(mp, p);
2108         }
2109         simple_unlock(&mountlist_slock);
2110
2111         *sizep = bp - where;
2112         return (0);
2113 }
2114
2115 /*
2116  * Check to see if a filesystem is mounted on a block device.
2117  */
2118 int
2119 vfs_mountedon(vp)
2120         struct vnode *vp;
2121 {
2122         struct vnode *vq;
2123         int error = 0;
2124
2125         if (vp->v_specflags & SI_MOUNTEDON)
2126                 return (EBUSY);
2127         if (vp->v_flag & VALIASED) {
2128                 simple_lock(&spechash_slock);
2129                 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
2130                         if (vq->v_rdev != vp->v_rdev ||
2131                             vq->v_type != vp->v_type)
2132                                 continue;
2133                         if (vq->v_specflags & SI_MOUNTEDON) {
2134                                 error = EBUSY;
2135                                 break;
2136                         }
2137                 }
2138                 simple_unlock(&spechash_slock);
2139         }
2140         return (error);
2141 }
2142
2143 /*
2144  * Unmount all filesystems. The list is traversed in reverse order
2145  * of mounting to avoid dependencies.
2146  */
2147 __private_extern__ void
2148 vfs_unmountall()
2149 {
2150         struct mount *mp, *nmp;
2151         struct proc *p = current_proc();
2152
2153         /*
2154          * Since this only runs when rebooting, it is not interlocked.
2155          */
2156         for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2157                 nmp = mp->mnt_list.cqe_prev;
2158                 (void) dounmount(mp, MNT_FORCE, p);
2159         }
2160 }
2161
2162 /*
2163  * Build hash lists of net addresses and hang them off the mount point.
2164  * Called by vfs_export() to set up the lists of export addresses.
2165  */
2166 static int
2167 vfs_hang_addrlist(mp, nep, argp)
2168         struct mount *mp;
2169         struct netexport *nep;
2170         struct export_args *argp;
2171 {
2172         register struct netcred *np;
2173         register struct radix_node_head *rnh;
2174         register int i;
2175         struct radix_node *rn;
2176         struct sockaddr *saddr, *smask = 0;
2177         struct domain *dom;
2178         int error;
2179
2180         if (argp->ex_addrlen == 0) {
2181                 if (mp->mnt_flag & MNT_DEFEXPORTED)
2182                         return (EPERM);
2183                 np = &nep->ne_defexported;
2184                 np->netc_exflags = argp->ex_flags;
2185                 np->netc_anon = argp->ex_anon;
2186                 np->netc_anon.cr_ref = 1;
2187                 mp->mnt_flag |= MNT_DEFEXPORTED;
2188                 return (0);
2189         }
2190         i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2191         MALLOC(np, struct netcred *, i, M_NETADDR, M_WAITOK);
2192         bzero((caddr_t)np, i);
2193         saddr = (struct sockaddr *)(np + 1);
2194         if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
2195                 goto out;
2196         if (saddr->sa_len > argp->ex_addrlen)
2197                 saddr->sa_len = argp->ex_addrlen;
2198         if (argp->ex_masklen) {
2199                 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
2200                 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
2201                 if (error)
2202                         goto out;
2203                 if (smask->sa_len > argp->ex_masklen)
2204                         smask->sa_len = argp->ex_masklen;
2205         }
2206         i = saddr->sa_family;
2207         if ((rnh = nep->ne_rtable[i]) == 0) {
2208                 /*
2209                  * Seems silly to initialize every AF when most are not
2210                  * used, do so on demand here
2211                  */
2212                 for (dom = domains; dom; dom = dom->dom_next)
2213                         if (dom->dom_family == i && dom->dom_rtattach) {
2214                                 dom->dom_rtattach((void **)&nep->ne_rtable[i],
2215                                         dom->dom_rtoffset);
2216                                 break;
2217                         }
2218                 if ((rnh = nep->ne_rtable[i]) == 0) {
2219                         error = ENOBUFS;
2220                         goto out;
2221                 }
2222         }
2223         rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
2224                 np->netc_rnodes);
2225         if (rn == 0) {
2226                 /*
2227                  * One of the reasons that rnh_addaddr may fail is that
2228                  * the entry already exists. To check for this case, we
2229                  * look up the entry to see if it is there. If so, we
2230                  * do not need to make a new entry but do return success.
2231                  */
2232                 _FREE(np, M_NETADDR);
2233                 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
2234                 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
2235                     ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
2236                     !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
2237                             (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
2238                         return (0);
2239                 return (EPERM);
2240         }
2241         np->netc_exflags = argp->ex_flags;
2242         np->netc_anon = argp->ex_anon;
2243         np->netc_anon.cr_ref = 1;
2244         return (0);
2245 out:
2246         _FREE(np, M_NETADDR);
2247         return (error);
2248 }
2249
2250 /* ARGSUSED */
2251 static int
2252 vfs_free_netcred(rn, w)
2253         struct radix_node *rn;
2254         caddr_t w;
2255 {
2256         register struct radix_node_head *rnh = (struct radix_node_head *)w;
2257
2258         (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
2259         _FREE((caddr_t)rn, M_NETADDR);
2260         return (0);
2261 }
2262
2263 /*
2264  * Free the net address hash lists that are hanging off the mount points.
2265  */
2266 static void
2267 vfs_free_addrlist(nep)
2268         struct netexport *nep;
2269 {
2270         register int i;
2271         register struct radix_node_head *rnh;
2272
2273         for (i = 0; i <= AF_MAX; i++)
2274                 if (rnh = nep->ne_rtable[i]) {
2275                         (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
2276                             (caddr_t)rnh);
2277                         _FREE((caddr_t)rnh, M_RTABLE);
2278                         nep->ne_rtable[i] = 0;
2279                 }
2280 }
2281
2282 int
2283 vfs_export(mp, nep, argp)
2284         struct mount *mp;
2285         struct netexport *nep;
2286         struct export_args *argp;
2287 {
2288         int error;
2289
2290         if (argp->ex_flags & MNT_DELEXPORT) {
2291                 vfs_free_addrlist(nep);
2292                 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2293         }
2294         if (argp->ex_flags & MNT_EXPORTED) {
2295                 if (error = vfs_hang_addrlist(mp, nep, argp))
2296                         return (error);
2297                 mp->mnt_flag |= MNT_EXPORTED;
2298         }
2299         return (0);
2300 }
2301
2302 struct netcred *
2303 vfs_export_lookup(mp, nep, nam)
2304         register struct mount *mp;
2305         struct netexport *nep;
2306         struct mbuf *nam;
2307 {
2308         register struct netcred *np;
2309         register struct radix_node_head *rnh;
2310         struct sockaddr *saddr;
2311
2312         np = NULL;
2313         if (mp->mnt_flag & MNT_EXPORTED) {
2314                 /*
2315                  * Lookup in the export list first.
2316                  */
2317                 if (nam != NULL) {
2318                         saddr = mtod(nam, struct sockaddr *);
2319                         rnh = nep->ne_rtable[saddr->sa_family];
2320                         if (rnh != NULL) {
2321                                 np = (struct netcred *)
2322                                         (*rnh->rnh_matchaddr)((caddr_t)saddr,
2323                                                               rnh);
2324                                 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2325                                         np = NULL;
2326                         }
2327                 }
2328                 /*
2329                  * If no address match, use the default if it exists.
2330                  */
2331                 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2332                         np = &nep->ne_defexported;
2333         }
2334         return (np);
2335 }
2336
2337 /*
2338  * try to reclaim vnodes from the memory
2339  * object cache
2340  */
2341 static int
2342 vm_object_cache_reclaim(int count)
2343 {
2344         int cnt;
2345         void vnode_pager_release_from_cache(int *);
2346
2347         /* attempt to reclaim vnodes from VM object cache */
2348         cnt = count;
2349         vnode_pager_release_from_cache(&cnt);
2350         return(cnt);
2351 }
2352
2353 /*
2354  * Release memory object reference held by inactive vnodes
2355  * and then try to reclaim some vnodes from the memory
2356  * object cache
2357  */
2358 static int
2359 vnreclaim(int count)
2360 {
2361         int cnt, i, loopcnt;
2362         void *obj;
2363         struct vnode *vp;
2364         int err;
2365         struct proc *p;
2366         kern_return_t kret;
2367
2368         i = 0;
2369         loopcnt = 0;
2370
2371         /* Try to release "count" vnodes from the inactive list */
2372 restart:
2373         if (++loopcnt > inactivevnodes) {
2374                 /*
2375                  * I did my best trying to reclaim the vnodes.
2376                  * Do not try any more as that would only lead to
2377                  * long latencies. Also in the worst case
2378                  * this can get totally CPU bound.
2379                  * Just fall though and attempt a reclaim of VM
2380                  * object cache
2381                  */
2382                 goto out;
2383         }
2384
2385         simple_lock(&vnode_free_list_slock);
2386         for (vp = TAILQ_FIRST(&vnode_inactive_list);
2387                         (vp != NULLVP) && (i < count);
2388                         vp = TAILQ_NEXT(vp, v_freelist)) {
2389
2390                 if (!simple_lock_try(&vp->v_interlock))
2391                         continue;
2392
2393                 if (vp->v_usecount != 1)
2394                         panic("vnreclaim: v_usecount");
2395
2396                 if(!UBCINFOEXISTS(vp)) {
2397                         if (vp->v_type == VBAD) {
2398                                 VREMINACTIVE("vnreclaim", vp);
2399                                 simple_unlock(&vp->v_interlock);
2400                                 continue;
2401                         } else
2402                                 panic("non UBC vnode on inactive list");
2403                                 /* Should not reach here */
2404                 }
2405
2406                 /* If vnode is already being reclaimed, wait */
2407                 if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
2408                         vp->v_flag |= VXWANT;
2409                         simple_unlock(&vp->v_interlock);
2410                         simple_unlock(&vnode_free_list_slock);
2411                         (void)tsleep((caddr_t)vp, PINOD, "vocr", 0);
2412                         goto restart;
2413                 }
2414
2415                 VREMINACTIVE("vnreclaim", vp);
2416                 simple_unlock(&vnode_free_list_slock);
2417
2418                 if (ubc_issetflags(vp, UI_WASMAPPED)) {
2419                         /*
2420                          * We should not reclaim as it is likely
2421                          * to be in use. Let it die a natural death.
2422                          * Release the UBC reference if one exists
2423                          * and put it back at the tail.
2424                          */
2425                         simple_unlock(&vp->v_interlock);
2426                         if (ubc_release_named(vp)) {
2427                                 if (UBCINFOEXISTS(vp)) {
2428                                         simple_lock(&vp->v_interlock);
2429                                         if (vp->v_usecount == 1 && !VONLIST(vp))
2430                                                 vinactive(vp);
2431                                         simple_unlock(&vp->v_interlock);
2432                                 }
2433                         } else {
2434                             simple_lock(&vp->v_interlock);
2435                                 vinactive(vp);
2436                                 simple_unlock(&vp->v_interlock);
2437                         }
2438                 } else {
2439                         int didhold;
2440
2441                         VORECLAIM_ENABLE(vp);
2442
2443                         /*
2444                          * scrub the dirty pages and invalidate the buffers
2445                          */
2446                         p = current_proc();
2447                         err = vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p);
2448                         if (err) {
2449                                 /* cannot reclaim */
2450                                 simple_lock(&vp->v_interlock);
2451                                 vinactive(vp);
2452                                 VORECLAIM_DISABLE(vp);
2453                                 i++;
2454                                 simple_unlock(&vp->v_interlock);
2455                                 goto restart;
2456                         }
2457
2458                         /* keep the vnode alive so we can kill it */
2459                         simple_lock(&vp->v_interlock);
2460                         if(vp->v_usecount != 1)
2461                                 panic("VOCR: usecount race");
2462                         vp->v_usecount++;
2463                         simple_unlock(&vp->v_interlock);
2464
2465                         /* clean up the state in VM without invalidating */
2466                         didhold = ubc_hold(vp);
2467                         if (didhold)
2468                                 (void)ubc_clean(vp, 0);
2469
2470                         /* flush and invalidate buffers associated with the vnode */
2471                         if (vp->v_tag == VT_NFS)
2472                                 nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
2473                         else
2474                                 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
2475
2476                         /*
2477                          * Note: for the v_usecount == 2 case, VOP_INACTIVE
2478                          * has not yet been called.  Call it now while vp is
2479                          * still locked, it will also release the lock.
2480                          */
2481                         if (vp->v_usecount == 2)
2482                                 VOP_INACTIVE(vp, p);
2483                         else
2484                                 VOP_UNLOCK(vp, 0, p);
2485
2486                         if (didhold)
2487                                 ubc_rele(vp);
2488
2489                         /*
2490                          * destroy the ubc named reference.
2491                          * If we can't because it is held for I/Os
2492                          * in progress, just put it back on the inactive
2493                          * list and move on.  Otherwise, the paging reference
2494                          * is toast (and so is this vnode?).
2495                          */
2496                         if (ubc_destroy_named(vp)) {
2497                             i++;
2498                         }
2499                         simple_lock(&vp->v_interlock);
2500                         VORECLAIM_DISABLE(vp);
2501                         simple_unlock(&vp->v_interlock);
2502                         vrele(vp);  /* release extra use we added here */
2503                 }
2504                 /* inactive list lock was released, must restart */
2505                 goto restart;
2506         }
2507         simple_unlock(&vnode_free_list_slock);
2508
2509         vnode_reclaim_tried += i;
2510 out:
2511         i = vm_object_cache_reclaim(count);
2512         vnode_objects_reclaimed += i;
2513
2514         return(i);
2515 }
2516
2517 /*
2518  * This routine is called from vnode_pager_no_senders()
2519  * which in turn can be called with vnode locked by vnode_uncache()
2520  * But it could also get called as a result of vm_object_cache_trim().
2521  * In that case lock state is unknown.
2522  * AGE the vnode so that it gets recycled quickly.
2523  * Check lock status to decide whether to call vput() or vrele().
2524  */
2525 __private_extern__ void
2526 vnode_pager_vrele(struct vnode *vp)
2527 {
2528
2529         boolean_t       funnel_state;
2530         int isvnreclaim = 1;
2531
2532         if (vp == (struct vnode *) NULL)
2533                 panic("vnode_pager_vrele: null vp");
2534
2535         funnel_state = thread_funnel_set(kernel_flock, TRUE);
2536
2537         /* Mark the vnode to be recycled */
2538         vagevp(vp);
2539
2540         simple_lock(&vp->v_interlock);
2541         /*
2542          * If a vgone (or vclean) is already in progress,
2543          * Do not bother with the ubc_info cleanup.
2544          * Let the vclean deal with it.
2545          */
2546         if (vp->v_flag & VXLOCK) {
2547                 CLR(vp->v_flag, VTERMINATE);
2548                 if (ISSET(vp->v_flag, VTERMWANT)) {
2549                         CLR(vp->v_flag, VTERMWANT);
2550                         wakeup((caddr_t)&vp->v_ubcinfo);
2551                 }
2552                 simple_unlock(&vp->v_interlock);
2553                 vrele(vp);
2554                 (void) thread_funnel_set(kernel_flock, funnel_state);
2555                 return;
2556         }
2557
2558         /* It's dead, Jim! */
2559         if (!ISSET(vp->v_flag, VORECLAIM)) {
2560                 /*
2561                  * called as a result of eviction of the memory
2562                  * object from the memory object cache
2563                  */
2564                 isvnreclaim = 0;
2565
2566                 /* So serialize vnode operations */
2567                 VORECLAIM_ENABLE(vp);
2568         }
2569         if (!ISSET(vp->v_flag, VTERMINATE))
2570                 SET(vp->v_flag, VTERMINATE);
2571         if (UBCINFOEXISTS(vp)) {
2572                 struct ubc_info *uip = vp->v_ubcinfo;
2573
2574                 if (ubc_issetflags(vp, UI_WASMAPPED))
2575                         SET(vp->v_flag, VWASMAPPED);
2576
2577                 vp->v_ubcinfo = UBC_NOINFO;  /* catch bad accesses */
2578                 simple_unlock(&vp->v_interlock);
2579                 ubc_info_deallocate(uip);
2580         } else {
2581                 if ((vp->v_type == VBAD) && ((vp)->v_ubcinfo != UBC_INFO_NULL)
2582                         && ((vp)->v_ubcinfo != UBC_NOINFO)) {
2583                         struct ubc_info *uip = vp->v_ubcinfo;
2584
2585                         vp->v_ubcinfo = UBC_NOINFO;  /* catch bad accesses */
2586                         simple_unlock(&vp->v_interlock);
2587                         ubc_info_deallocate(uip);
2588                 } else {
2589                         simple_unlock(&vp->v_interlock);
2590                 }
2591         }
2592
2593         CLR(vp->v_flag, VTERMINATE);
2594
2595         if (vp->v_type != VBAD){
2596                 vgone(vp);      /* revoke the vnode */
2597                 vrele(vp);      /* and drop the reference */
2598         } else
2599                 vrele(vp);
2600
2601         if (ISSET(vp->v_flag, VTERMWANT)) {
2602                 CLR(vp->v_flag, VTERMWANT);
2603                 wakeup((caddr_t)&vp->v_ubcinfo);
2604         }
2605         if (!isvnreclaim)
2606                 VORECLAIM_DISABLE(vp);
2607         (void) thread_funnel_set(kernel_flock, funnel_state);
2608         return;
2609 }
2610
2611
2612 #if DIAGNOSTIC
2613 int walk_vnodes_debug=0;
2614
2615 void
2616 walk_allvnodes()
2617 {
2618         struct proc *p = current_proc();
2619         struct mount *mp, *nmp;
2620         struct vnode *vp;
2621         int cnt = 0;
2622
2623         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2624                 for (vp = mp->mnt_vnodelist.lh_first;
2625                      vp != NULL;
2626                      vp = vp->v_mntvnodes.le_next) {
2627                         if (vp->v_usecount < 0){
2628                                 if(walk_vnodes_debug) {
2629                                         printf("vp is %x\n",vp);
2630                                 }
2631                         }
2632                 }
2633                 nmp = mp->mnt_list.cqe_next;
2634         }
2635         for (cnt = 0, vp = vnode_free_list.tqh_first;
2636                 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2637                 if ((vp->v_usecount < 0) && walk_vnodes_debug) {
2638                         if(walk_vnodes_debug) {
2639                                 printf("vp is %x\n",vp);
2640                         }
2641                 }
2642         }
2643         printf("%d - free\n", cnt);
2644
2645         for (cnt = 0, vp = vnode_inactive_list.tqh_first;
2646                 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2647                 if ((vp->v_usecount < 0) && walk_vnodes_debug) {
2648                         if(walk_vnodes_debug) {
2649                                 printf("vp is %x\n",vp);
2650                         }
2651                 }
2652         }
2653         printf("%d - inactive\n", cnt);
2654 }
2655 #endif /* DIAGNOSTIC */
2656
2657 void
2658 vfs_io_attributes(vp, flags, iosize, vectors)
2659         struct vnode    *vp;
2660         int     flags;  /* B_READ or B_WRITE */
2661         int     *iosize;
2662         int     *vectors;
2663 {
2664         struct mount *mp;
2665
2666         /* start with "reasonable" defaults */
2667         *iosize = MAXPHYS;
2668         *vectors = 32;
2669
2670         mp = vp->v_mount;
2671         if (mp != NULL) {
2672                 switch (flags) {
2673                 case B_READ:
2674                         *iosize = mp->mnt_maxreadcnt;
2675                         *vectors = mp->mnt_segreadcnt;
2676                         break;
2677                 case B_WRITE:
2678                         *iosize = mp->mnt_maxwritecnt;
2679                         *vectors = mp->mnt_segwritecnt;
2680                         break;
2681                 default:
2682                         break;
2683                 }
2684         }
2685
2686         return;
2687 }
2688
2689 #include <dev/disk.h>
2690
2691 int
2692 vfs_init_io_attributes(devvp, mp)
2693         struct vnode *devvp;
2694         struct mount *mp;
2695 {
2696         int error;
2697         off_t readblockcnt;
2698         off_t writeblockcnt;
2699         off_t readsegcnt;
2700         off_t writesegcnt;
2701         u_long blksize;
2702
2703         u_int64_t temp;
2704
2705         struct proc *p = current_proc();
2706         struct  ucred *cred = p->p_ucred;
2707
2708         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD,
2709                                 (caddr_t)&readblockcnt, 0, cred, p)))
2710                 return (error);
2711
2712         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE,
2713                                 (caddr_t)&writeblockcnt, 0, cred, p)))
2714                 return (error);
2715
2716         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD,
2717                                 (caddr_t)&readsegcnt, 0, cred, p)))
2718                 return (error);
2719
2720         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE,
2721                                 (caddr_t)&writesegcnt, 0, cred, p)))
2722                 return (error);
2723
2724         if ((error = VOP_IOCTL(devvp, DKIOCGETBLOCKSIZE,
2725                                 (caddr_t)&blksize, 0, cred, p)))
2726                 return (error);
2727
2728         temp = readblockcnt * blksize;
2729         temp = (temp > UINT32_MAX) ? (UINT32_MAX / blksize) * blksize : temp;
2730         mp->mnt_maxreadcnt = (u_int32_t)temp;
2731
2732         temp = writeblockcnt * blksize;
2733         temp = (temp > UINT32_MAX) ? (UINT32_MAX / blksize) * blksize : temp;
2734         mp->mnt_maxwritecnt = (u_int32_t)temp;
2735
2736         temp = (readsegcnt > UINT16_MAX) ? UINT16_MAX : readsegcnt;
2737         mp->mnt_segreadcnt = (u_int16_t)temp;
2738
2739         temp = (writesegcnt > UINT16_MAX) ? UINT16_MAX : writesegcnt;
2740         mp->mnt_segwritecnt = (u_int16_t)temp;
2741
2742 #if 0
2743         printf("--- IO attributes for mount point 0x%08x ---\n", mp);
2744         printf("\tmnt_maxreadcnt = 0x%x", mp->mnt_maxreadcnt);
2745         printf("\tmnt_maxwritecnt = 0x%x\n", mp->mnt_maxwritecnt);
2746         printf("\tmnt_segreadcnt = 0x%x", mp->mnt_segreadcnt);
2747         printf("\tmnt_segwritecnt = 0x%x\n", mp->mnt_segwritecnt);
2748 #endif /* 0 */
2749
2750         return (error);
2751 }
2752