bsd/vfs/vfs_subr.c

   1 /*
   2  * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  23 /*
  24  * Copyright (c) 1989, 1993
  25  *      The Regents of the University of California.  All rights reserved.
  26  * (c) UNIX System Laboratories, Inc.
  27  * All or some portions of this file are derived from material licensed
  28  * to the University of California by American Telephone and Telegraph
  29  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  30  * the permission of UNIX System Laboratories, Inc.
  31  *
  32  * Redistribution and use in source and binary forms, with or without
  33  * modification, are permitted provided that the following conditions
  34  * are met:
  35  * 1. Redistributions of source code must retain the above copyright
  36  *    notice, this list of conditions and the following disclaimer.
  37  * 2. Redistributions in binary form must reproduce the above copyright
  38  *    notice, this list of conditions and the following disclaimer in the
  39  *    documentation and/or other materials provided with the distribution.
  40  * 3. All advertising materials mentioning features or use of this software
  41  *    must display the following acknowledgement:
  42  *      This product includes software developed by the University of
  43  *      California, Berkeley and its contributors.
  44  * 4. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  *      @(#)vfs_subr.c  8.31 (Berkeley) 5/26/95
  61  */
  62
  63 /*
  64  * External virtual filesystem routines
  65  */
  66
  67 #define DIAGNOSTIC 1
  68
  69 #include <sys/param.h>
  70 #include <sys/systm.h>
  71 #include <sys/proc.h>
  72 #include <sys/mount.h>
  73 #include <sys/time.h>
  74 #include <sys/vnode.h>
  75 #include <sys/stat.h>
  76 #include <sys/namei.h>
  77 #include <sys/ucred.h>
  78 #include <sys/buf.h>
  79 #include <sys/errno.h>
  80 #include <sys/malloc.h>
  81 #include <sys/domain.h>
  82 #include <sys/mbuf.h>
  83 #include <sys/syslog.h>
  84 #include <sys/ubc.h>
  85 #include <sys/vm.h>
  86 #include <sys/sysctl.h>
  87
  88 #include <kern/assert.h>
  89
  90 #include <miscfs/specfs/specdev.h>
  91
  92 #include <mach/mach_types.h>
  93 #include <mach/memory_object_types.h>
  94
  95
  96 enum vtype iftovt_tab[16] = {
  97         VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
  98         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
  99 };
 100 int     vttoif_tab[9] = {
 101         0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
 102         S_IFSOCK, S_IFIFO, S_IFMT,
 103 };
 104
 105 static void vfree(struct vnode *vp);
 106 static void vinactive(struct vnode *vp);
 107 static int vnreclaim(int count);
 108 extern kern_return_t
 109         adjust_vm_object_cache(vm_size_t oval, vm_size_t nval);
 110
 111 /*
 112  * Insq/Remq for the vnode usage lists.
 113  */
 114 #define bufinsvn(bp, dp)        LIST_INSERT_HEAD(dp, bp, b_vnbufs)
 115 #define bufremvn(bp) {                                                  \
 116         LIST_REMOVE(bp, b_vnbufs);                                      \
 117         (bp)->b_vnbufs.le_next = NOLIST;                                \
 118 }
 119
 120 TAILQ_HEAD(freelst, vnode) vnode_free_list;     /* vnode free list */
 121 TAILQ_HEAD(inactivelst, vnode) vnode_inactive_list;     /* vnode inactive list */
 122 struct mntlist mountlist;                       /* mounted filesystem list */
 123
 124 #if DIAGNOSTIC
 125 #define VLISTCHECK(fun, vp, list)       \
 126         if ((vp)->v_freelist.tqe_prev == (struct vnode **)0xdeadb) \
 127                 panic("%s: %s vnode not on %slist", (fun), (list), (list));
 128
 129 #define VINACTIVECHECK(fun, vp, expected)       \
 130         do {    \
 131                 int __is_inactive = ISSET((vp)->v_flag, VUINACTIVE);    \
 132                 if (__is_inactive ^ expected)   \
 133                         panic("%s: %sinactive vnode, expected %s", (fun),       \
 134                                 __is_inactive? "" : "not ",     \
 135                                 expected? "inactive": "not inactive"); \
 136         } while(0)
 137 #else
 138 #define VLISTCHECK(fun, vp, list)
 139 #define VINACTIVECHECK(fun, vp, expected)
 140 #endif /* DIAGNOSTIC */
 141
 142 #define VLISTNONE(vp)   \
 143         do {    \
 144                 (vp)->v_freelist.tqe_next = (struct vnode *)0;  \
 145                 (vp)->v_freelist.tqe_prev = (struct vnode **)0xdeadb;   \
 146         } while(0)
 147
 148 #define VONLIST(vp)     \
 149         ((vp)->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
 150
 151 /* remove a vnode from free vnode list */
 152 #define VREMFREE(fun, vp)       \
 153         do {    \
 154                 VLISTCHECK((fun), (vp), "free");        \
 155                 TAILQ_REMOVE(&vnode_free_list, (vp), v_freelist);       \
 156                 VLISTNONE((vp));        \
 157                 freevnodes--;   \
 158         } while(0)
 159
 160 /* remove a vnode from inactive vnode list */
 161 #define VREMINACTIVE(fun, vp)   \
 162         do {    \
 163                 VLISTCHECK((fun), (vp), "inactive"); \
 164                 VINACTIVECHECK((fun), (vp), VUINACTIVE); \
 165                 TAILQ_REMOVE(&vnode_inactive_list, (vp), v_freelist); \
 166                 CLR((vp)->v_flag, VUINACTIVE); \
 167                 VLISTNONE((vp));        \
 168                 inactivevnodes--;       \
 169         } while(0)
 170
 171 #define VORECLAIM_ENABLE(vp)   \
 172         do {    \
 173                 if (ISSET((vp)->v_flag, VORECLAIM))     \
 174                         panic("vm object raclaim already");     \
 175                 SET((vp)->v_flag, VORECLAIM);   \
 176         } while(0)
 177
 178 #define VORECLAIM_DISABLE(vp)   \
 179         do {    \
 180                 CLR((vp)->v_flag, VORECLAIM);   \
 181                 if (ISSET((vp)->v_flag, VXWANT)) {      \
 182                         CLR((vp)->v_flag, VXWANT);      \
 183                         wakeup((caddr_t)(vp));  \
 184                 }       \
 185         } while(0)
 186
 187 /*
 188  * Have to declare first two locks as actual data even if !MACH_SLOCKS, since
 189  * a pointers to them get passed around.
 190  */
 191 simple_lock_data_t mountlist_slock;
 192 simple_lock_data_t mntvnode_slock;
 193 decl_simple_lock_data(,mntid_slock);
 194 decl_simple_lock_data(,vnode_free_list_slock);
 195 decl_simple_lock_data(,spechash_slock);
 196
 197 /*
 198  * vnodetarget is the amount of vnodes we expect to get back
 199  * from the the inactive vnode list and VM object cache.
 200  * As vnreclaim() is a mainly cpu bound operation for faster
 201  * processers this number could be higher.
 202  * Having this number too high introduces longer delays in
 203  * the execution of getnewvnode().
 204  */
 205 unsigned long vnodetarget;              /* target for vnreclaim() */
 206 #define VNODE_FREE_TARGET       20      /* Default value for vnodetarget */
 207
 208 /*
 209  * We need quite a few vnodes on the free list to sustain the
 210  * rapid stat() the compilation process does, and still benefit from the name
 211  * cache. Having too few vnodes on the free list causes serious disk
 212  * thrashing as we cycle through them.
 213  */
 214 #define VNODE_FREE_MIN          300     /* freelist should have at least these many */
 215
 216 /*
 217  * We need to get vnodes back from the VM object cache when a certain #
 218  * of vnodes are reused from the freelist. This is essential for the
 219  * caching to be effective in the namecache and the buffer cache [for the
 220  * metadata].
 221  */
 222 #define VNODE_TOOMANY_REUSED    (VNODE_FREE_MIN/4)
 223
 224 /*
 225  * If we have enough vnodes on the freelist we do not want to reclaim
 226  * the vnodes from the VM object cache.
 227  */
 228 #define VNODE_FREE_ENOUGH       (VNODE_FREE_MIN + (VNODE_FREE_MIN/2))
 229
 230 /*
 231  * Initialize the vnode management data structures.
 232  */
 233 __private_extern__ void
 234 vntblinit()
 235 {
 236         extern struct lock__bsd__       exchangelock;
 237
 238         simple_lock_init(&mountlist_slock);
 239         simple_lock_init(&mntvnode_slock);
 240         simple_lock_init(&mntid_slock);
 241         simple_lock_init(&spechash_slock);
 242         TAILQ_INIT(&vnode_free_list);
 243         simple_lock_init(&vnode_free_list_slock);
 244         TAILQ_INIT(&vnode_inactive_list);
 245         CIRCLEQ_INIT(&mountlist);
 246     lockinit(&exchangelock, PVFS, "exchange", 0, 0);
 247
 248         if (!vnodetarget)
 249                 vnodetarget = VNODE_FREE_TARGET;
 250
 251         /*
 252          * Scale the vm_object_cache to accomodate the vnodes
 253          * we want to cache
 254          */
 255         (void) adjust_vm_object_cache(0, desiredvnodes - VNODE_FREE_MIN);
 256 }
 257
 258 /* Reset the VM Object Cache with the values passed in */
 259 __private_extern__ kern_return_t
 260 reset_vmobjectcache(unsigned int val1, unsigned int val2)
 261 {
 262         vm_size_t oval = val1 - VNODE_FREE_MIN;
 263         vm_size_t nval = val2 - VNODE_FREE_MIN;
 264
 265         return(adjust_vm_object_cache(oval, nval));
 266 }
 267
 268 /*
 269  * Mark a mount point as busy. Used to synchronize access and to delay
 270  * unmounting. Interlock is not released on failure.
 271  */
 272 int
 273 vfs_busy(mp, flags, interlkp, p)
 274         struct mount *mp;
 275         int flags;
 276         struct slock *interlkp;
 277         struct proc *p;
 278 {
 279         int lkflags;
 280
 281         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 282                 if (flags & LK_NOWAIT)
 283                         return (ENOENT);
 284                 mp->mnt_kern_flag |= MNTK_MWAIT;
 285                 if (interlkp)
 286                         simple_unlock(interlkp);
 287                 /*
 288                  * Since all busy locks are shared except the exclusive
 289                  * lock granted when unmounting, the only place that a
 290                  * wakeup needs to be done is at the release of the
 291                  * exclusive lock at the end of dounmount.
 292                  */
 293                 sleep((caddr_t)mp, PVFS);
 294                 if (interlkp)
 295                         simple_lock(interlkp);
 296                 return (ENOENT);
 297         }
 298         lkflags = LK_SHARED;
 299         if (interlkp)
 300                 lkflags |= LK_INTERLOCK;
 301         if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
 302                 panic("vfs_busy: unexpected lock failure");
 303         return (0);
 304 }
 305
 306 /*
 307  * Free a busy filesystem.
 308  */
 309 void
 310 vfs_unbusy(mp, p)
 311         struct mount *mp;
 312         struct proc *p;
 313 {
 314
 315         lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
 316 }
 317
 318 /*
 319  * Lookup a filesystem type, and if found allocate and initialize
 320  * a mount structure for it.
 321  *
 322  * Devname is usually updated by mount(8) after booting.
 323  */
 324 int
 325 vfs_rootmountalloc(fstypename, devname, mpp)
 326         char *fstypename;
 327         char *devname;
 328         struct mount **mpp;
 329 {
 330         struct proc *p = current_proc();        /* XXX */
 331         struct vfsconf *vfsp;
 332         struct mount *mp;
 333
 334         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 335                 if (!strcmp(vfsp->vfc_name, fstypename))
 336                         break;
 337         if (vfsp == NULL)
 338                 return (ENODEV);
 339         mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 340         bzero((char *)mp, (u_long)sizeof(struct mount));
 341
 342     /* Initialize the default IO constraints */
 343     mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
 344     mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
 345
 346         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
 347         (void)vfs_busy(mp, LK_NOWAIT, 0, p);
 348         LIST_INIT(&mp->mnt_vnodelist);
 349         mp->mnt_vfc = vfsp;
 350         mp->mnt_op = vfsp->vfc_vfsops;
 351         mp->mnt_flag = MNT_RDONLY;
 352         mp->mnt_vnodecovered = NULLVP;
 353         vfsp->vfc_refcount++;
 354         mp->mnt_stat.f_type = vfsp->vfc_typenum;
 355         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 356         strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
 357         mp->mnt_stat.f_mntonname[0] = '/';
 358         (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
 359         *mpp = mp;
 360         return (0);
 361 }
 362
 363 /*
 364  * Find an appropriate filesystem to use for the root. If a filesystem
 365  * has not been preselected, walk through the list of known filesystems
 366  * trying those that have mountroot routines, and try them until one
 367  * works or we have tried them all.
 368  */
 369 int
 370 vfs_mountroot()
 371 {
 372         struct vfsconf *vfsp;
 373         extern int (*mountroot)(void);
 374         int error;
 375
 376         if (mountroot != NULL) {
 377                 error = (*mountroot)();
 378                 return (error);
 379         }
 380
 381         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 382                 if (vfsp->vfc_mountroot == NULL)
 383                         continue;
 384                 if ((error = (*vfsp->vfc_mountroot)()) == 0)
 385                         return (0);
 386                 if (error != EINVAL)
 387                         printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
 388         }
 389         return (ENODEV);
 390 }
 391
 392 /*
 393  * Lookup a mount point by filesystem identifier.
 394  */
 395 struct mount *
 396 vfs_getvfs(fsid)
 397         fsid_t *fsid;
 398 {
 399         register struct mount *mp;
 400
 401         simple_lock(&mountlist_slock);
 402         for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
 403              mp = mp->mnt_list.cqe_next) {
 404                 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 405                     mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 406                         simple_unlock(&mountlist_slock);
 407                         return (mp);
 408                 }
 409         }
 410         simple_unlock(&mountlist_slock);
 411         return ((struct mount *)0);
 412 }
 413
 414 /*
 415  * Get a new unique fsid
 416  */
 417 void
 418 vfs_getnewfsid(mp)
 419         struct mount *mp;
 420 {
 421 static u_short xxxfs_mntid;
 422
 423         fsid_t tfsid;
 424         int mtype;
 425
 426         simple_lock(&mntid_slock);
 427         mtype = mp->mnt_vfc->vfc_typenum;
 428         mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
 429         mp->mnt_stat.f_fsid.val[1] = mtype;
 430         if (xxxfs_mntid == 0)
 431                 ++xxxfs_mntid;
 432         tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
 433         tfsid.val[1] = mtype;
 434         if (mountlist.cqh_first != (void *)&mountlist) {
 435                 while (vfs_getvfs(&tfsid)) {
 436                         tfsid.val[0]++;
 437                         xxxfs_mntid++;
 438                 }
 439         }
 440         mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
 441         simple_unlock(&mntid_slock);
 442 }
 443
 444 /*
 445  * Set vnode attributes to VNOVAL
 446  */
 447 void
 448 vattr_null(vap)
 449         register struct vattr *vap;
 450 {
 451
 452         vap->va_type = VNON;
 453         vap->va_size = vap->va_bytes = VNOVAL;
 454         vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
 455                 vap->va_fsid = vap->va_fileid =
 456                 vap->va_blocksize = vap->va_rdev =
 457                 vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
 458                 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
 459                 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
 460                 vap->va_flags = vap->va_gen = VNOVAL;
 461         vap->va_vaflags = 0;
 462 }
 463
 464 /*
 465  * Routines having to do with the management of the vnode table.
 466  */
 467 extern int (**dead_vnodeop_p)(void *);
 468 static void vclean __P((struct vnode *vp, int flag, struct proc *p));
 469 extern void vgonel __P((struct vnode *vp, struct proc *p));
 470 long numvnodes, freevnodes;
 471 long inactivevnodes;
 472 long vnode_reclaim_tried;
 473 long vnode_objects_reclaimed;
 474
 475
 476 extern struct vattr va_null;
 477
 478 /*
 479  * Return the next vnode from the free list.
 480  */
 481 int
 482 getnewvnode(tag, mp, vops, vpp)
 483         enum vtagtype tag;
 484         struct mount *mp;
 485         int (**vops)(void *);
 486         struct vnode **vpp;
 487 {
 488         struct proc *p = current_proc();        /* XXX */
 489         struct vnode *vp;
 490         int cnt, didretry = 0;
 491         static int reused = 0;                          /* track the reuse rate */
 492         int reclaimhits = 0;
 493
 494 retry:
 495         simple_lock(&vnode_free_list_slock);
 496         /*
 497          * MALLOC a vnode if the number of vnodes has not reached the desired
 498          * value and the number on the free list is still reasonable...
 499          * reuse from the freelist even though we may evict a name cache entry
 500          * to reduce the number of vnodes that accumulate.... vnodes tie up
 501          * wired memory and are never garbage collected
 502          */
 503         if (numvnodes < desiredvnodes && (freevnodes < (2 * VNODE_FREE_MIN))) {
 504                 numvnodes++;
 505                 simple_unlock(&vnode_free_list_slock);
 506                 MALLOC_ZONE(vp, struct vnode *, sizeof *vp, M_VNODE, M_WAITOK);
 507                 bzero((char *)vp, sizeof *vp);
 508                 VLISTNONE(vp);          /* avoid double queue removal */
 509                 simple_lock_init(&vp->v_interlock);
 510                 goto done;
 511         }
 512
 513         /*
 514          * Once the desired number of vnodes are allocated,
 515          * we start reusing the vnodes.
 516          */
 517         if (freevnodes < VNODE_FREE_MIN) {
 518                 /*
 519                  * if we are low on vnodes on the freelist attempt to get
 520                  * some back from the inactive list and VM object cache
 521                  */
 522                 simple_unlock(&vnode_free_list_slock);
 523                 (void)vnreclaim(vnodetarget);
 524                 simple_lock(&vnode_free_list_slock);
 525         }
 526         if (numvnodes >= desiredvnodes && reused > VNODE_TOOMANY_REUSED) {
 527                 reused = 0;
 528                 if (freevnodes < VNODE_FREE_ENOUGH) {
 529                         simple_unlock(&vnode_free_list_slock);
 530                         (void)vnreclaim(vnodetarget);
 531                         simple_lock(&vnode_free_list_slock);
 532                 }
 533         }
 534
 535         for (cnt = 0, vp = vnode_free_list.tqh_first;
 536                         vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
 537                 if (simple_lock_try(&vp->v_interlock)) {
 538                         /* got the interlock */
 539                         if (ISSET(vp->v_flag, VORECLAIM)) {
 540                                 /* skip over the vnodes that are being reclaimed */
 541                                 simple_unlock(&vp->v_interlock);
 542                                 reclaimhits++;
 543                         } else
 544                         break;
 545         }
 546         }
 547
 548         /*
 549          * Unless this is a bad time of the month, at most
 550          * the first NCPUS items on the free list are
 551          * locked, so this is close enough to being empty.
 552          */
 553         if (vp == NULLVP) {
 554                 simple_unlock(&vnode_free_list_slock);
 555                 if (!(didretry++) && (vnreclaim(vnodetarget) > 0))
 556                         goto retry;
 557                 tablefull("vnode");
 558                 log(LOG_EMERG, "%d vnodes locked, %d desired, %d numvnodes, "
 559                         "%d free, %d inactive, %d being reclaimed\n",
 560                         cnt, desiredvnodes, numvnodes, freevnodes, inactivevnodes,
 561                         reclaimhits);
 562                 *vpp = 0;
 563                 return (ENFILE);
 564         }
 565
 566         if (vp->v_usecount)
 567                 panic("free vnode isn't: v_type = %d, v_usecount = %d?",
 568                                 vp->v_type, vp->v_usecount);
 569
 570         VREMFREE("getnewvnode", vp);
 571         reused++;
 572         simple_unlock(&vnode_free_list_slock);
 573         vp->v_lease = NULL;
 574         cache_purge(vp);
 575         if (vp->v_type != VBAD)
 576                 vgonel(vp, p);  /* clean and reclaim the vnode */
 577         else
 578                 simple_unlock(&vp->v_interlock);
 579 #if DIAGNOSTIC
 580         if (vp->v_data)
 581                 panic("cleaned vnode isn't");
 582         {
 583         int s = splbio();
 584         if (vp->v_numoutput)
 585                 panic("Clean vnode has pending I/O's");
 586         splx(s);
 587         }
 588 #endif
 589         if (UBCINFOEXISTS(vp))
 590                 panic("getnewvnode: ubcinfo not cleaned");
 591         else
 592                 vp->v_ubcinfo = 0;
 593
 594         vp->v_lastr = -1;
 595         vp->v_ralen = 0;
 596         vp->v_maxra = 0;
 597         vp->v_lastw = 0;
 598         vp->v_ciosiz = 0;
 599         vp->v_cstart = 0;
 600         vp->v_clen = 0;
 601         vp->v_socket = 0;
 602
 603 done:
 604         vp->v_flag = VSTANDARD;
 605         vp->v_type = VNON;
 606         vp->v_tag = tag;
 607         vp->v_op = vops;
 608         insmntque(vp, mp);
 609         *vpp = vp;
 610         vp->v_usecount = 1;
 611         vp->v_data = 0;
 612         return (0);
 613 }
 614
 615 /*
 616  * Move a vnode from one mount queue to another.
 617  */
 618 void
 619 insmntque(vp, mp)
 620         struct vnode *vp;
 621         struct mount *mp;
 622 {
 623
 624         simple_lock(&mntvnode_slock);
 625         /*
 626          * Delete from old mount point vnode list, if on one.
 627          */
 628         if (vp->v_mount != NULL)
 629                 LIST_REMOVE(vp, v_mntvnodes);
 630         /*
 631          * Insert into list of vnodes for the new mount point, if available.
 632          */
 633         if ((vp->v_mount = mp) != NULL)
 634                 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
 635         simple_unlock(&mntvnode_slock);
 636 }
 637
 638 __inline void
 639 vpwakeup(struct vnode *vp)
 640 {
 641         if (vp) {
 642                 if (--vp->v_numoutput < 0)
 643                         panic("vpwakeup: neg numoutput");
 644                 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
 645                         if (vp->v_numoutput < 0)
 646                                 panic("vpwakeup: neg numoutput 2");
 647                         vp->v_flag &= ~VBWAIT;
 648                         wakeup((caddr_t)&vp->v_numoutput);
 649                 }
 650         }
 651 }
 652
 653 /*
 654  * Update outstanding I/O count and do wakeup if requested.
 655  */
 656 void
 657 vwakeup(bp)
 658         register struct buf *bp;
 659 {
 660         register struct vnode *vp;
 661
 662         CLR(bp->b_flags, B_WRITEINPROG);
 663         vpwakeup(bp->b_vp);
 664 }
 665
 666 /*
 667  * Flush out and invalidate all buffers associated with a vnode.
 668  * Called with the underlying object locked.
 669  */
 670 int
 671 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 672         register struct vnode *vp;
 673         int flags;
 674         struct ucred *cred;
 675         struct proc *p;
 676         int slpflag, slptimeo;
 677 {
 678         register struct buf *bp;
 679         struct buf *nbp, *blist;
 680         int s, error = 0;
 681
 682         if (flags & V_SAVE) {
 683                 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) {
 684                         return (error);
 685                 }
 686                 if (vp->v_dirtyblkhd.lh_first != NULL || (vp->v_flag & VHASDIRTY))
 687                         panic("vinvalbuf: dirty bufs");
 688         }
 689
 690         for (;;) {
 691                 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
 692                         while (blist && blist->b_lblkno < 0)
 693                                 blist = blist->b_vnbufs.le_next;
 694                 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
 695                     (flags & V_SAVEMETA))
 696                         while (blist && blist->b_lblkno < 0)
 697                                 blist = blist->b_vnbufs.le_next;
 698                 if (!blist)
 699                         break;
 700
 701                 for (bp = blist; bp; bp = nbp) {
 702                         nbp = bp->b_vnbufs.le_next;
 703                         if (flags & V_SAVEMETA && bp->b_lblkno < 0)
 704                                 continue;
 705                         s = splbio();
 706                         if (ISSET(bp->b_flags, B_BUSY)) {
 707                                 SET(bp->b_flags, B_WANTED);
 708                                 error = tsleep((caddr_t)bp,
 709                                         slpflag | (PRIBIO + 1), "vinvalbuf",
 710                                         slptimeo);
 711                                 splx(s);
 712                                 if (error) {
 713                                         return (error);
 714                                 }
 715                                 break;
 716                         }
 717                         bremfree(bp);
 718                         SET(bp->b_flags, B_BUSY);
 719                         splx(s);
 720                         /*
 721                          * XXX Since there are no node locks for NFS, I believe
 722                          * there is a slight chance that a delayed write will
 723                          * occur while sleeping just above, so check for it.
 724                          */
 725                         if (ISSET(bp->b_flags, B_DELWRI) && (flags & V_SAVE)) {
 726                                 (void) VOP_BWRITE(bp);
 727                                 break;
 728                         }
 729                         SET(bp->b_flags, B_INVAL);
 730                         brelse(bp);
 731                 }
 732         }
 733         if (!(flags & V_SAVEMETA) &&
 734             (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
 735                 panic("vinvalbuf: flush failed");
 736         return (0);
 737 }
 738
 739 /*
 740  * Associate a buffer with a vnode.
 741  */
 742 void
 743 bgetvp(vp, bp)
 744         register struct vnode *vp;
 745         register struct buf *bp;
 746 {
 747
 748         if (bp->b_vp)
 749                 panic("bgetvp: not free");
 750         VHOLD(vp);
 751         bp->b_vp = vp;
 752         if (vp->v_type == VBLK || vp->v_type == VCHR)
 753                 bp->b_dev = vp->v_rdev;
 754         else
 755                 bp->b_dev = NODEV;
 756         /*
 757          * Insert onto list for new vnode.
 758          */
 759         bufinsvn(bp, &vp->v_cleanblkhd);
 760 }
 761
 762 /*
 763  * Disassociate a buffer from a vnode.
 764  */
 765 void
 766 brelvp(bp)
 767         register struct buf *bp;
 768 {
 769         struct vnode *vp;
 770
 771         if (bp->b_vp == (struct vnode *) 0)
 772                 panic("brelvp: NULL");
 773         /*
 774          * Delete from old vnode list, if on one.
 775          */
 776         if (bp->b_vnbufs.le_next != NOLIST)
 777                 bufremvn(bp);
 778         vp = bp->b_vp;
 779         bp->b_vp = (struct vnode *) 0;
 780         HOLDRELE(vp);
 781 }
 782
 783 /*
 784  * Reassign a buffer from one vnode to another.
 785  * Used to assign file specific control information
 786  * (indirect blocks) to the vnode to which they belong.
 787  */
 788 void
 789 reassignbuf(bp, newvp)
 790         register struct buf *bp;
 791         register struct vnode *newvp;
 792 {
 793         register struct buflists *listheadp;
 794
 795         if (newvp == NULL) {
 796                 printf("reassignbuf: NULL");
 797                 return;
 798         }
 799         /*
 800          * Delete from old vnode list, if on one.
 801          */
 802         if (bp->b_vnbufs.le_next != NOLIST)
 803                 bufremvn(bp);
 804         /*
 805          * If dirty, put on list of dirty buffers;
 806          * otherwise insert onto list of clean buffers.
 807          */
 808         if (ISSET(bp->b_flags, B_DELWRI))
 809                 listheadp = &newvp->v_dirtyblkhd;
 810         else
 811                 listheadp = &newvp->v_cleanblkhd;
 812         bufinsvn(bp, listheadp);
 813 }
 814
 815 /*
 816  * Create a vnode for a block device.
 817  * Used for root filesystem, argdev, and swap areas.
 818  * Also used for memory file system special devices.
 819  */
 820 int
 821 bdevvp(dev, vpp)
 822         dev_t dev;
 823         struct vnode **vpp;
 824 {
 825         register struct vnode *vp;
 826         struct vnode *nvp;
 827         int error;
 828
 829         if (dev == NODEV) {
 830                 *vpp = NULLVP;
 831                 return (ENODEV);
 832         }
 833         error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
 834         if (error) {
 835                 *vpp = NULLVP;
 836                 return (error);
 837         }
 838         vp = nvp;
 839         vp->v_type = VBLK;
 840         if (nvp = checkalias(vp, dev, (struct mount *)0)) {
 841                 vput(vp);
 842                 vp = nvp;
 843         }
 844         *vpp = vp;
 845         return (0);
 846 }
 847
 848 /*
 849  * Check to see if the new vnode represents a special device
 850  * for which we already have a vnode (either because of
 851  * bdevvp() or because of a different vnode representing
 852  * the same block device). If such an alias exists, deallocate
 853  * the existing contents and return the aliased vnode. The
 854  * caller is responsible for filling it with its new contents.
 855  */
 856 struct vnode *
 857 checkalias(nvp, nvp_rdev, mp)
 858         register struct vnode *nvp;
 859         dev_t nvp_rdev;
 860         struct mount *mp;
 861 {
 862         struct proc *p = current_proc();        /* XXX */
 863         struct vnode *vp;
 864         struct vnode **vpp;
 865         struct specinfo * bufhold;
 866         int buffree = 1;
 867
 868         if (nvp->v_type != VBLK && nvp->v_type != VCHR)
 869                 return (NULLVP);
 870
 871         bufhold = (struct specinfo *)_MALLOC_ZONE(sizeof(struct specinfo),
 872                         M_VNODE, M_WAITOK);
 873         vpp = &speclisth[SPECHASH(nvp_rdev)];
 874 loop:
 875         simple_lock(&spechash_slock);
 876         for (vp = *vpp; vp; vp = vp->v_specnext) {
 877                 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
 878                         continue;
 879                 /*
 880                  * Alias, but not in use, so flush it out.
 881                  */
 882                 simple_lock(&vp->v_interlock);
 883                 if (vp->v_usecount == 0) {
 884                         simple_unlock(&spechash_slock);
 885                         vgonel(vp, p);
 886                         goto loop;
 887                 }
 888                 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
 889                         simple_unlock(&spechash_slock);
 890                         goto loop;
 891                 }
 892                 break;
 893         }
 894         if (vp == NULL || vp->v_tag != VT_NON) {
 895                 nvp->v_specinfo = bufhold;
 896                 buffree = 0;    /* buffer used */
 897                 bzero(nvp->v_specinfo, sizeof(struct specinfo));
 898                 nvp->v_rdev = nvp_rdev;
 899                 nvp->v_hashchain = vpp;
 900                 nvp->v_specnext = *vpp;
 901                 nvp->v_specflags = 0;
 902                 simple_unlock(&spechash_slock);
 903                 *vpp = nvp;
 904                 if (vp != NULLVP) {
 905                         nvp->v_flag |= VALIASED;
 906                         vp->v_flag |= VALIASED;
 907                         vput(vp);
 908                 }
 909                 /* Since buffer is used just return */
 910                 return (NULLVP);
 911         }
 912         simple_unlock(&spechash_slock);
 913         VOP_UNLOCK(vp, 0, p);
 914         simple_lock(&vp->v_interlock);
 915         vclean(vp, 0, p);
 916         vp->v_op = nvp->v_op;
 917         vp->v_tag = nvp->v_tag;
 918         nvp->v_type = VNON;
 919         insmntque(vp, mp);
 920         if (buffree)
 921                 _FREE_ZONE((void *)bufhold, sizeof (struct specinfo), M_VNODE);
 922         return (vp);
 923 }
 924
 925 /*
 926  * Get a reference on a particular vnode and lock it if requested.
 927  * If the vnode was on the inactive list, remove it from the list.
 928  * If the vnode was on the free list, remove it from the list and
 929  * move it to inactive list as needed.
 930  * The vnode lock bit is set if the vnode is being eliminated in
 931  * vgone. The process is awakened when the transition is completed,
 932  * and an error returned to indicate that the vnode is no longer
 933  * usable (possibly having been changed to a new file system type).
 934  */
 935 int
 936 vget(vp, flags, p)
 937         struct vnode *vp;
 938         int flags;
 939         struct proc *p;
 940 {
 941         int error = 0;
 942
 943         /*
 944          * If the vnode is in the process of being cleaned out for
 945          * another use, we wait for the cleaning to finish and then
 946          * return failure. Cleaning is determined by checking that
 947          * the VXLOCK flag is set.
 948          */
 949         if ((flags & LK_INTERLOCK) == 0)
 950                 simple_lock(&vp->v_interlock);
 951         if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
 952                 vp->v_flag |= VXWANT;
 953                 simple_unlock(&vp->v_interlock);
 954                 (void)tsleep((caddr_t)vp, PINOD, "vget", 0);
 955                 return (ENOENT);
 956         }
 957
 958         /*
 959          * vnode is being terminated.
 960          * wait for vnode_pager_no_senders() to clear VTERMINATE
 961          */
 962         if (ISSET(vp->v_flag, VTERMINATE)) {
 963                 SET(vp->v_flag, VTERMWANT);
 964                 simple_unlock(&vp->v_interlock);
 965                 (void)tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vclean", 0);
 966                 return (ENOENT);
 967         }
 968
 969         simple_lock(&vnode_free_list_slock);
 970         if (vp->v_usecount == 0) {
 971                 /* If on the free list, remove it from there */
 972                 if (VONLIST(vp))
 973                         VREMFREE("vget", vp);
 974         } else {
 975                 /* If on the inactive list, remove it from there */
 976                 if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
 977                         if (VONLIST(vp))
 978                                 VREMINACTIVE("vget", vp);
 979                 }
 980         }
 981
 982         /* The vnode should not be on the inactive list here */
 983         VINACTIVECHECK("vget", vp, 0);
 984
 985         simple_unlock(&vnode_free_list_slock);
 986
 987         if (++vp->v_usecount <= 0)
 988                 panic("vget: v_usecount");
 989
 990         /*
 991          * Recover named reference as needed
 992          */
 993         if (UBCISVALID(vp) && !ubc_issetflags(vp, UI_HASOBJREF)) {
 994                 simple_unlock(&vp->v_interlock);
 995                 if (ubc_getobject(vp, UBC_HOLDOBJECT)) {
 996                         error = ENOENT;
 997                         goto errout;
 998                 }
 999                 simple_lock(&vp->v_interlock);
1000         }
1001
1002         if (flags & LK_TYPE_MASK) {
1003                 if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
1004                         goto errout;
1005                 return (0);
1006         }
1007
1008         if ((flags & LK_INTERLOCK) == 0)
1009                 simple_unlock(&vp->v_interlock);
1010         return (0);
1011
1012 errout:
1013         /*
1014          * If the vnode was not active in the first place
1015          * must not call vrele() as VOP_INACTIVE() is not
1016          * required.
1017          * So inlined part of vrele() here.
1018          */
1019         simple_lock(&vp->v_interlock);
1020         if (--vp->v_usecount == 1) {
1021                 if (UBCINFOEXISTS(vp)) {
1022                         vinactive(vp);
1023                         simple_unlock(&vp->v_interlock);
1024                         return (error);
1025                 }
1026         }
1027         if (vp->v_usecount > 0) {
1028                 simple_unlock(&vp->v_interlock);
1029                 return (error);
1030         }
1031         if (vp->v_usecount < 0)
1032                 panic("vget: negative usecount (%d)", vp->v_usecount);
1033         vfree(vp);
1034         simple_unlock(&vp->v_interlock);
1035         return (error);
1036 }
1037
1038 /*
1039  * Get a pager reference on the particular vnode.
1040  *
1041  * This is called from ubc_info_init() and it is asumed that
1042  * the vnode is neither on the free list on on the inactive list.
1043  * It is also assumed that the vnode is neither being recycled
1044  * by vgonel nor being terminated by vnode_pager_vrele().
1045  *
1046  * The vnode interlock is NOT held by the caller.
1047  */
1048 __private_extern__ int
1049 vnode_pager_vget(vp)
1050         struct vnode *vp;
1051 {
1052         simple_lock(&vp->v_interlock);
1053         if (UBCINFOMISSING(vp))
1054                 panic("vnode_pager_vget: stolen ubc_info");
1055
1056         if (!UBCINFOEXISTS(vp))
1057                 panic("vnode_pager_vget: lost ubc_info");
1058
1059         if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM))
1060                 panic("vnode_pager_vget: already being reclaimd");
1061
1062         if (ISSET(vp->v_flag, VTERMINATE))
1063                 panic("vnode_pager_vget: already being terminated");
1064
1065         simple_lock(&vnode_free_list_slock);
1066         /* The vnode should not be on ANY list */
1067         if (VONLIST(vp))
1068                 panic("vnode_pager_vget: still on the list");
1069
1070         /* The vnode should not be on the inactive list here */
1071         VINACTIVECHECK("vnode_pager_vget", vp, 0);
1072         simple_unlock(&vnode_free_list_slock);
1073
1074         /* After all those checks, now do the real work :-) */
1075         if (++vp->v_usecount <= 0)
1076                 panic("vnode_pager_vget: v_usecount");
1077         simple_unlock(&vp->v_interlock);
1078
1079         return (0);
1080 }
1081
1082 /*
1083  * Stubs to use when there is no locking to be done on the underlying object.
1084  * A minimal shared lock is necessary to ensure that the underlying object
1085  * is not revoked while an operation is in progress. So, an active shared
1086  * count is maintained in an auxillary vnode lock structure.
1087  */
1088 int
1089 vop_nolock(ap)
1090         struct vop_lock_args /* {
1091                 struct vnode *a_vp;
1092                 int a_flags;
1093                 struct proc *a_p;
1094         } */ *ap;
1095 {
1096 #ifdef notyet
1097         /*
1098          * This code cannot be used until all the non-locking filesystems
1099          * (notably NFS) are converted to properly lock and release nodes.
1100          * Also, certain vnode operations change the locking state within
1101          * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
1102          * and symlink). Ideally these operations should not change the
1103          * lock state, but should be changed to let the caller of the
1104          * function unlock them. Otherwise all intermediate vnode layers
1105          * (such as union, umapfs, etc) must catch these functions to do
1106          * the necessary locking at their layer. Note that the inactive
1107          * and lookup operations also change their lock state, but this
1108          * cannot be avoided, so these two operations will always need
1109          * to be handled in intermediate layers.
1110          */
1111         struct vnode *vp = ap->a_vp;
1112         int vnflags, flags = ap->a_flags;
1113
1114         if (vp->v_vnlock == NULL) {
1115                 if ((flags & LK_TYPE_MASK) == LK_DRAIN)
1116                         return (0);
1117                 MALLOC_ZONE(vp->v_vnlock, struct lock__bsd__ *,
1118                                 sizeof(struct lock__bsd__), M_VNODE, M_WAITOK);
1119                 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1120         }
1121         switch (flags & LK_TYPE_MASK) {
1122         case LK_DRAIN:
1123                 vnflags = LK_DRAIN;
1124                 break;
1125         case LK_EXCLUSIVE:
1126         case LK_SHARED:
1127                 vnflags = LK_SHARED;
1128                 break;
1129         case LK_UPGRADE:
1130         case LK_EXCLUPGRADE:
1131         case LK_DOWNGRADE:
1132                 return (0);
1133         case LK_RELEASE:
1134         default:
1135                 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
1136         }
1137         if (flags & LK_INTERLOCK)
1138                 vnflags |= LK_INTERLOCK;
1139         return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
1140 #else /* for now */
1141         /*
1142          * Since we are not using the lock manager, we must clear
1143          * the interlock here.
1144          */
1145         if (ap->a_flags & LK_INTERLOCK)
1146                 simple_unlock(&ap->a_vp->v_interlock);
1147         return (0);
1148 #endif
1149 }
1150
1151 /*
1152  * Decrement the active use count.
1153  */
1154 int
1155 vop_nounlock(ap)
1156         struct vop_unlock_args /* {
1157                 struct vnode *a_vp;
1158                 int a_flags;
1159                 struct proc *a_p;
1160         } */ *ap;
1161 {
1162         struct vnode *vp = ap->a_vp;
1163
1164         if (vp->v_vnlock == NULL)
1165                 return (0);
1166         return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p));
1167 }
1168
1169 /*
1170  * Return whether or not the node is in use.
1171  */
1172 int
1173 vop_noislocked(ap)
1174         struct vop_islocked_args /* {
1175                 struct vnode *a_vp;
1176         } */ *ap;
1177 {
1178         struct vnode *vp = ap->a_vp;
1179
1180         if (vp->v_vnlock == NULL)
1181                 return (0);
1182         return (lockstatus(vp->v_vnlock));
1183 }
1184
1185 /*
1186  * Vnode reference.
1187  */
1188 void
1189 vref(vp)
1190         struct vnode *vp;
1191 {
1192
1193         simple_lock(&vp->v_interlock);
1194         if (vp->v_usecount <= 0)
1195                 panic("vref used where vget required");
1196
1197         /* If on the inactive list, remove it from there */
1198         if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
1199                 if (VONLIST(vp)) {
1200                         simple_lock(&vnode_free_list_slock);
1201                         VREMINACTIVE("vref", vp);
1202                         simple_unlock(&vnode_free_list_slock);
1203                 }
1204         }
1205         /* The vnode should not be on the inactive list here */
1206         VINACTIVECHECK("vref", vp, 0);
1207
1208         if (++vp->v_usecount <= 0)
1209                 panic("vref v_usecount");
1210         simple_unlock(&vp->v_interlock);
1211 }
1212
1213 /*
1214  * put the vnode on appropriate free list.
1215  * called with v_interlock held.
1216  */
1217 static void
1218 vfree(vp)
1219         struct vnode *vp;
1220 {
1221         /*
1222          * if the vnode is not obtained by calling getnewvnode() we
1223          * are not responsible for the cleanup. Just return.
1224          */
1225         if (!(vp->v_flag & VSTANDARD)) {
1226                 return;
1227         }
1228
1229         if (vp->v_usecount != 0)
1230                 panic("vfree: v_usecount");
1231
1232         /* insert at tail of LRU list or at head if VAGE is set */
1233         simple_lock(&vnode_free_list_slock);
1234
1235         if (VONLIST(vp))
1236                  panic("vfree: vnode still on list");
1237
1238         if (vp->v_flag & VAGE) {
1239                 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1240                 vp->v_flag &= ~VAGE;
1241         } else
1242                 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1243         freevnodes++;
1244         simple_unlock(&vnode_free_list_slock);
1245         return;
1246 }
1247
1248 /*
1249  * put the vnode on the inactive list.
1250  * called with v_interlock held
1251  */
1252 static void
1253 vinactive(vp)
1254         struct vnode *vp;
1255 {
1256         if (!UBCINFOEXISTS(vp))
1257                 panic("vinactive: not a UBC vnode");
1258
1259         if (vp->v_usecount != 1)
1260                 panic("vinactive: v_usecount");
1261
1262         simple_lock(&vnode_free_list_slock);
1263
1264         if (VONLIST(vp))
1265                  panic("vinactive: vnode still on list");
1266         VINACTIVECHECK("vinactive", vp, 0);
1267
1268         TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_freelist);
1269         SET(vp->v_flag, VUINACTIVE);
1270         CLR(vp->v_flag, (VNOCACHE_DATA | VRAOFF));
1271
1272         inactivevnodes++;
1273         simple_unlock(&vnode_free_list_slock);
1274         return;
1275 }
1276
1277
1278 /*
1279  * vput(), just unlock and vrele()
1280  */
1281 void
1282 vput(vp)
1283         struct vnode *vp;
1284 {
1285         struct proc *p = current_proc();        /* XXX */
1286
1287         simple_lock(&vp->v_interlock);
1288         if (--vp->v_usecount == 1) {
1289                 if (UBCINFOEXISTS(vp)) {
1290                         vinactive(vp);
1291                         simple_unlock(&vp->v_interlock);
1292                         VOP_UNLOCK(vp, 0, p);
1293                         return;
1294                 }
1295         }
1296         if (vp->v_usecount > 0) {
1297                 simple_unlock(&vp->v_interlock);
1298                 VOP_UNLOCK(vp, 0, p);
1299                 return;
1300         }
1301 #if DIAGNOSTIC
1302         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1303                 vprint("vput: bad ref count", vp);
1304                 panic("vput: v_usecount = %d, v_writecount = %d",
1305                         vp->v_usecount, vp->v_writecount);
1306         }
1307 #endif
1308         if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
1309                 VREMINACTIVE("vrele", vp);
1310
1311         simple_unlock(&vp->v_interlock);
1312         VOP_INACTIVE(vp, p);
1313         /*
1314          * The interlock is not held and
1315          * VOP_INCATIVE releases the vnode lock.
1316          * We could block and the vnode might get reactivated
1317          * Can not just call vfree without checking the state
1318          */
1319         simple_lock(&vp->v_interlock);
1320         if (!VONLIST(vp)) {
1321                 if (vp->v_usecount == 0)
1322                         vfree(vp);
1323                 else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp))
1324                         vinactive(vp);
1325         }
1326         simple_unlock(&vp->v_interlock);
1327 }
1328
1329 /*
1330  * Vnode release.
1331  * If count drops to zero, call inactive routine and return to freelist.
1332  */
1333 void
1334 vrele(vp)
1335         struct vnode *vp;
1336 {
1337         struct proc *p = current_proc();        /* XXX */
1338
1339         simple_lock(&vp->v_interlock);
1340         if (--vp->v_usecount == 1) {
1341                 if (UBCINFOEXISTS(vp)) {
1342                         vinactive(vp);
1343                         simple_unlock(&vp->v_interlock);
1344                         return;
1345                 }
1346         }
1347         if (vp->v_usecount > 0) {
1348                 simple_unlock(&vp->v_interlock);
1349                 return;
1350         }
1351 #if DIAGNOSTIC
1352         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1353                 vprint("vrele: bad ref count", vp);
1354                 panic("vrele: ref cnt");
1355         }
1356 #endif
1357         if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
1358                 VREMINACTIVE("vrele", vp);
1359
1360
1361         if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
1362                 /* vnode is being cleaned, just return */
1363                 vfree(vp);
1364                 simple_unlock(&vp->v_interlock);
1365                 return;
1366         }
1367
1368         if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1369                 VOP_INACTIVE(vp, p);
1370                 /*
1371                  * vn_lock releases the interlock and
1372                  * VOP_INCATIVE releases the vnode lock.
1373                  * We could block and the vnode might get reactivated
1374                  * Can not just call vfree without checking the state
1375                  */
1376                 simple_lock(&vp->v_interlock);
1377                 if (!VONLIST(vp)) {
1378                         if (vp->v_usecount == 0)
1379                                 vfree(vp);
1380                         else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp))
1381                                 vinactive(vp);
1382                 }
1383                 simple_unlock(&vp->v_interlock);
1384         }
1385 #if 0
1386         else {
1387                 vfree(vp);
1388                 simple_unlock(&vp->v_interlock);
1389                 kprintf("vrele: vn_lock() failed for vp = 0x%08x\n", vp);
1390         }
1391 #endif
1392 }
1393
1394 void
1395 vagevp(vp)
1396         struct vnode *vp;
1397 {
1398         simple_lock(&vp->v_interlock);
1399         vp->v_flag |= VAGE;
1400         simple_unlock(&vp->v_interlock);
1401         return;
1402 }
1403
1404 /*
1405  * Page or buffer structure gets a reference.
1406  */
1407 void
1408 vhold(vp)
1409         register struct vnode *vp;
1410 {
1411
1412         simple_lock(&vp->v_interlock);
1413         vp->v_holdcnt++;
1414         simple_unlock(&vp->v_interlock);
1415 }
1416
1417 /*
1418  * Page or buffer structure frees a reference.
1419  */
1420 void
1421 holdrele(vp)
1422         register struct vnode *vp;
1423 {
1424
1425         simple_lock(&vp->v_interlock);
1426         if (vp->v_holdcnt <= 0)
1427                 panic("holdrele: holdcnt");
1428         vp->v_holdcnt--;
1429         simple_unlock(&vp->v_interlock);
1430 }
1431
1432 /*
1433  * Remove any vnodes in the vnode table belonging to mount point mp.
1434  *
1435  * If MNT_NOFORCE is specified, there should not be any active ones,
1436  * return error if any are found (nb: this is a user error, not a
1437  * system error). If MNT_FORCE is specified, detach any active vnodes
1438  * that are found.
1439  */
1440 #if DIAGNOSTIC
1441 int busyprt = 0;        /* print out busy vnodes */
1442 #if 0
1443 struct ctldebug debug1 = { "busyprt", &busyprt };
1444 #endif /* 0 */
1445 #endif
1446
1447 int
1448 vflush(mp, skipvp, flags)
1449         struct mount *mp;
1450         struct vnode *skipvp;
1451         int flags;
1452 {
1453         struct proc *p = current_proc();
1454         struct vnode *vp, *nvp;
1455         int busy = 0;
1456
1457         simple_lock(&mntvnode_slock);
1458 loop:
1459         for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1460                 if (vp->v_mount != mp)
1461                         goto loop;
1462                 nvp = vp->v_mntvnodes.le_next;
1463                 /*
1464                  * Skip over a selected vnode.
1465                  */
1466                 if (vp == skipvp)
1467                         continue;
1468
1469                 simple_lock(&vp->v_interlock);
1470                 /*
1471                  * Skip over a vnodes marked VSYSTEM.
1472                  */
1473                 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1474                         simple_unlock(&vp->v_interlock);
1475                         continue;
1476                 }
1477                 /*
1478                  * Skip over a vnodes marked VSWAP.
1479                  */
1480                 if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) {
1481                         simple_unlock(&vp->v_interlock);
1482                         continue;
1483                 }
1484                 /*
1485                  * If WRITECLOSE is set, only flush out regular file
1486                  * vnodes open for writing.
1487                  */
1488                 if ((flags & WRITECLOSE) &&
1489                     (vp->v_writecount == 0 || vp->v_type != VREG)) {
1490                         simple_unlock(&vp->v_interlock);
1491                         continue;
1492                 }
1493                 /*
1494                  * With v_usecount == 0, all we need to do is clear
1495                  * out the vnode data structures and we are done.
1496                  */
1497                 if (vp->v_usecount == 0) {
1498                         simple_unlock(&mntvnode_slock);
1499                         vgonel(vp, p);
1500                         simple_lock(&mntvnode_slock);
1501                         continue;
1502                 }
1503                 /*
1504                  * If FORCECLOSE is set, forcibly close the vnode.
1505                  * For block or character devices, revert to an
1506                  * anonymous device. For all other files, just kill them.
1507                  */
1508                 if (flags & FORCECLOSE) {
1509                         simple_unlock(&mntvnode_slock);
1510                         if (vp->v_type != VBLK && vp->v_type != VCHR) {
1511                                 vgonel(vp, p);
1512                         } else {
1513                                 vclean(vp, 0, p);
1514                                 vp->v_op = spec_vnodeop_p;
1515                                 insmntque(vp, (struct mount *)0);
1516                         }
1517                         simple_lock(&mntvnode_slock);
1518                         continue;
1519                 }
1520 #if DIAGNOSTIC
1521                 if (busyprt)
1522                         vprint("vflush: busy vnode", vp);
1523 #endif
1524                 simple_unlock(&vp->v_interlock);
1525                 busy++;
1526         }
1527         simple_unlock(&mntvnode_slock);
1528         if (busy)
1529                 return (EBUSY);
1530         return (0);
1531 }
1532
1533 /*
1534  * Disassociate the underlying file system from a vnode.
1535  * The vnode interlock is held on entry.
1536  */
1537 static void
1538 vclean(vp, flags, p)
1539         struct vnode *vp;
1540         int flags;
1541         struct proc *p;
1542 {
1543         int active;
1544         void *obj;
1545         kern_return_t kret;
1546         int removed = 0;
1547         int didhold;
1548
1549         /*
1550          * if the vnode is not obtained by calling getnewvnode() we
1551          * are not responsible for the cleanup. Just return.
1552          */
1553         if (!(vp->v_flag & VSTANDARD)) {
1554                 simple_unlock(&vp->v_interlock);
1555                 return;
1556         }
1557
1558         /*
1559          * Check to see if the vnode is in use.
1560          * If so we have to reference it before we clean it out
1561          * so that its count cannot fall to zero and generate a
1562          * race against ourselves to recycle it.
1563          */
1564         if (active = vp->v_usecount)
1565                 if (++vp->v_usecount <= 0)
1566                         panic("vclean: v_usecount");
1567         /*
1568          * Prevent the vnode from being recycled or
1569          * brought into use while we clean it out.
1570          */
1571         if (vp->v_flag & VXLOCK)
1572                 panic("vclean: deadlock");
1573         vp->v_flag |= VXLOCK;
1574
1575         /*
1576          * Even if the count is zero, the VOP_INACTIVE routine may still
1577          * have the object locked while it cleans it out. The VOP_LOCK
1578          * ensures that the VOP_INACTIVE routine is done with its work.
1579          * For active vnodes, it ensures that no other activity can
1580          * occur while the underlying object is being cleaned out.
1581          */
1582         VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1583
1584         /*
1585          * if this vnode is on the inactive list
1586          * take it off the list.
1587          */
1588         if ((active == 1) &&
1589                 (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))) {
1590                 simple_lock(&vnode_free_list_slock);
1591                 VREMINACTIVE("vclean", vp);
1592                 simple_unlock(&vnode_free_list_slock);
1593                 removed++;
1594         }
1595
1596         /* Clean the pages in VM. */
1597         if (active && (flags & DOCLOSE))
1598                 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1599
1600         /* Clean the pages in VM. */
1601         didhold = ubc_hold(vp);
1602         if ((active) && (didhold))
1603                 (void)ubc_clean(vp, 0); /* do not invalidate */
1604
1605         /*
1606          * Clean out any buffers associated with the vnode.
1607          */
1608         if (flags & DOCLOSE) {
1609                 if (vp->v_tag == VT_NFS)
1610             nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
1611         else
1612             vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1613     }
1614
1615         if (active)
1616                 VOP_INACTIVE(vp, p);
1617         else
1618                 VOP_UNLOCK(vp, 0, p);
1619
1620         /* Destroy ubc named reference */
1621     if (didhold) {
1622         ubc_rele(vp);
1623                 ubc_destroy_named(vp);
1624         }
1625
1626         /*
1627          * Reclaim the vnode.
1628          */
1629         if (VOP_RECLAIM(vp, p))
1630                 panic("vclean: cannot reclaim");
1631         cache_purge(vp);
1632         if (vp->v_vnlock) {
1633                 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1634                         vprint("vclean: lock not drained", vp);
1635                 FREE_ZONE(vp->v_vnlock, sizeof (struct lock__bsd__), M_VNODE);
1636                 vp->v_vnlock = NULL;
1637         }
1638
1639         /* It's dead, Jim! */
1640         vp->v_op = dead_vnodeop_p;
1641         vp->v_tag = VT_NON;
1642
1643         /*
1644          * Done with purge, notify sleepers of the grim news.
1645          */
1646         vp->v_flag &= ~VXLOCK;
1647         if (vp->v_flag & VXWANT) {
1648                 vp->v_flag &= ~VXWANT;
1649                 wakeup((caddr_t)vp);
1650         }
1651
1652         if (active)
1653                 vrele(vp);
1654 }
1655
1656 /*
1657  * Eliminate all activity associated with  the requested vnode
1658  * and with all vnodes aliased to the requested vnode.
1659  */
1660 int
1661 vop_revoke(ap)
1662         struct vop_revoke_args /* {
1663                 struct vnode *a_vp;
1664                 int a_flags;
1665         } */ *ap;
1666 {
1667         struct vnode *vp, *vq;
1668         struct proc *p = current_proc();
1669
1670 #if DIAGNOSTIC
1671         if ((ap->a_flags & REVOKEALL) == 0)
1672                 panic("vop_revoke");
1673 #endif
1674
1675         vp = ap->a_vp;
1676         simple_lock(&vp->v_interlock);
1677
1678         if (vp->v_flag & VALIASED) {
1679                 /*
1680                  * If a vgone (or vclean) is already in progress,
1681                  * wait until it is done and return.
1682                  */
1683                 if (vp->v_flag & VXLOCK) {
1684                         while (vp->v_flag & VXLOCK) {
1685                                 vp->v_flag |= VXWANT;
1686                                 simple_unlock(&vp->v_interlock);
1687                                 (void)tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1688                         }
1689                         return (0);
1690                 }
1691                 /*
1692                  * Ensure that vp will not be vgone'd while we
1693                  * are eliminating its aliases.
1694                  */
1695                 vp->v_flag |= VXLOCK;
1696                 simple_unlock(&vp->v_interlock);
1697                 while (vp->v_flag & VALIASED) {
1698                         simple_lock(&spechash_slock);
1699                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1700                                 if (vq->v_rdev != vp->v_rdev ||
1701                                     vq->v_type != vp->v_type || vp == vq)
1702                                         continue;
1703                                 simple_unlock(&spechash_slock);
1704                                 vgone(vq);
1705                                 break;
1706                         }
1707                         if (vq == NULLVP)
1708                                 simple_unlock(&spechash_slock);
1709                 }
1710                 /*
1711                  * Remove the lock so that vgone below will
1712                  * really eliminate the vnode after which time
1713                  * vgone will awaken any sleepers.
1714                  */
1715                 simple_lock(&vp->v_interlock);
1716                 vp->v_flag &= ~VXLOCK;
1717         }
1718         vgonel(vp, p);
1719         return (0);
1720 }
1721
1722 /*
1723  * Recycle an unused vnode to the front of the free list.
1724  * Release the passed interlock if the vnode will be recycled.
1725  */
1726 int
1727 vrecycle(vp, inter_lkp, p)
1728         struct vnode *vp;
1729         struct slock *inter_lkp;
1730         struct proc *p;
1731 {
1732
1733         simple_lock(&vp->v_interlock);
1734         if (vp->v_usecount == 0) {
1735                 if (inter_lkp)
1736                         simple_unlock(inter_lkp);
1737                 vgonel(vp, p);
1738                 return (1);
1739         }
1740         simple_unlock(&vp->v_interlock);
1741         return (0);
1742 }
1743
1744 /*
1745  * Eliminate all activity associated with a vnode
1746  * in preparation for reuse.
1747  */
1748 void
1749 vgone(vp)
1750         struct vnode *vp;
1751 {
1752         struct proc *p = current_proc();
1753
1754         simple_lock(&vp->v_interlock);
1755         vgonel(vp, p);
1756 }
1757
1758 /*
1759  * vgone, with the vp interlock held.
1760  */
1761 void
1762 vgonel(vp, p)
1763         struct vnode *vp;
1764         struct proc *p;
1765 {
1766         struct vnode *vq;
1767         struct vnode *vx;
1768
1769         /*
1770          * if the vnode is not obtained by calling getnewvnode() we
1771          * are not responsible for the cleanup. Just return.
1772          */
1773         if (!(vp->v_flag & VSTANDARD)) {
1774                 simple_unlock(&vp->v_interlock);
1775                 return;
1776         }
1777
1778         /*
1779          * If a vgone (or vclean) is already in progress,
1780          * wait until it is done and return.
1781          */
1782         if (vp->v_flag & VXLOCK) {
1783                 while (vp->v_flag & VXLOCK) {
1784                         vp->v_flag |= VXWANT;
1785                         simple_unlock(&vp->v_interlock);
1786                         (void)tsleep((caddr_t)vp, PINOD, "vgone", 0);
1787                 }
1788                 return;
1789         }
1790         /*
1791          * Clean out the filesystem specific data.
1792          */
1793         vclean(vp, DOCLOSE, p);
1794         /*
1795          * Delete from old mount point vnode list, if on one.
1796          */
1797         if (vp->v_mount != NULL)
1798                 insmntque(vp, (struct mount *)0);
1799         /*
1800          * If special device, remove it from special device alias list
1801          * if it is on one.
1802          */
1803         if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1804                 simple_lock(&spechash_slock);
1805                 if (*vp->v_hashchain == vp) {
1806                         *vp->v_hashchain = vp->v_specnext;
1807                 } else {
1808                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1809                                 if (vq->v_specnext != vp)
1810                                         continue;
1811                                 vq->v_specnext = vp->v_specnext;
1812                                 break;
1813                         }
1814                         if (vq == NULL)
1815                                 panic("missing bdev");
1816                 }
1817                 if (vp->v_flag & VALIASED) {
1818                         vx = NULL;
1819                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1820                                 if (vq->v_rdev != vp->v_rdev ||
1821                                     vq->v_type != vp->v_type)
1822                                         continue;
1823                                 if (vx)
1824                                         break;
1825                                 vx = vq;
1826                         }
1827                         if (vx == NULL)
1828                                 panic("missing alias");
1829                         if (vq == NULL)
1830                                 vx->v_flag &= ~VALIASED;
1831                         vp->v_flag &= ~VALIASED;
1832                 }
1833                 simple_unlock(&spechash_slock);
1834                 FREE_ZONE(vp->v_specinfo, sizeof (struct specinfo), M_VNODE);
1835                 vp->v_specinfo = NULL;
1836         }
1837         /*
1838          * If it is on the freelist and not already at the head,
1839          * move it to the head of the list. The test of the back
1840          * pointer and the reference count of zero is because
1841          * it will be removed from the free list by getnewvnode,
1842          * but will not have its reference count incremented until
1843          * after calling vgone. If the reference count were
1844          * incremented first, vgone would (incorrectly) try to
1845          * close the previous instance of the underlying object.
1846          * So, the back pointer is explicitly set to `0xdeadb' in
1847          * getnewvnode after removing it from the freelist to ensure
1848          * that we do not try to move it here.
1849          */
1850         if (vp->v_usecount == 0) {
1851                 simple_lock(&vnode_free_list_slock);
1852                 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1853                     vnode_free_list.tqh_first != vp) {
1854                         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1855                         TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1856                 }
1857                 simple_unlock(&vnode_free_list_slock);
1858         }
1859         vp->v_type = VBAD;
1860 }
1861
1862 /*
1863  * Lookup a vnode by device number.
1864  */
1865 int
1866 vfinddev(dev, type, vpp)
1867         dev_t dev;
1868         enum vtype type;
1869         struct vnode **vpp;
1870 {
1871         struct vnode *vp;
1872         int rc = 0;
1873
1874         simple_lock(&spechash_slock);
1875         for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1876                 if (dev != vp->v_rdev || type != vp->v_type)
1877                         continue;
1878                 *vpp = vp;
1879                 rc = 1;
1880                 break;
1881         }
1882         simple_unlock(&spechash_slock);
1883         return (rc);
1884 }
1885
1886 /*
1887  * Calculate the total number of references to a special device.
1888  */
1889 int
1890 vcount(vp)
1891         struct vnode *vp;
1892 {
1893         struct vnode *vq, *vnext;
1894         int count;
1895
1896 loop:
1897         if ((vp->v_flag & VALIASED) == 0)
1898                 return (vp->v_usecount);
1899         simple_lock(&spechash_slock);
1900         for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1901                 vnext = vq->v_specnext;
1902                 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1903                         continue;
1904                 /*
1905                  * Alias, but not in use, so flush it out.
1906                  */
1907                 if (vq->v_usecount == 0 && vq != vp) {
1908                         simple_unlock(&spechash_slock);
1909                         vgone(vq);
1910                         goto loop;
1911                 }
1912                 count += vq->v_usecount;
1913         }
1914         simple_unlock(&spechash_slock);
1915         return (count);
1916 }
1917
1918 int     prtactive = 0;          /* 1 => print out reclaim of active vnodes */
1919
1920 /*
1921  * Print out a description of a vnode.
1922  */
1923 static char *typename[] =
1924    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1925
1926 void
1927 vprint(label, vp)
1928         char *label;
1929         register struct vnode *vp;
1930 {
1931         char buf[64];
1932
1933         if (label != NULL)
1934                 printf("%s: ", label);
1935         printf("type %s, usecount %d, writecount %d, refcount %d,",
1936                 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1937                 vp->v_holdcnt);
1938         buf[0] = '\0';
1939         if (vp->v_flag & VROOT)
1940                 strcat(buf, "|VROOT");
1941         if (vp->v_flag & VTEXT)
1942                 strcat(buf, "|VTEXT");
1943         if (vp->v_flag & VSYSTEM)
1944                 strcat(buf, "|VSYSTEM");
1945         if (vp->v_flag & VXLOCK)
1946                 strcat(buf, "|VXLOCK");
1947         if (vp->v_flag & VXWANT)
1948                 strcat(buf, "|VXWANT");
1949         if (vp->v_flag & VBWAIT)
1950                 strcat(buf, "|VBWAIT");
1951         if (vp->v_flag & VALIASED)
1952                 strcat(buf, "|VALIASED");
1953         if (buf[0] != '\0')
1954                 printf(" flags (%s)", &buf[1]);
1955         if (vp->v_data == NULL) {
1956                 printf("\n");
1957         } else {
1958                 printf("\n\t");
1959                 VOP_PRINT(vp);
1960         }
1961 }
1962
1963 #ifdef DEBUG
1964 /*
1965  * List all of the locked vnodes in the system.
1966  * Called when debugging the kernel.
1967  */
1968 void
1969 printlockedvnodes()
1970 {
1971         struct proc *p = current_proc();
1972         struct mount *mp, *nmp;
1973         struct vnode *vp;
1974
1975         printf("Locked vnodes\n");
1976         simple_lock(&mountlist_slock);
1977         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1978                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1979                         nmp = mp->mnt_list.cqe_next;
1980                         continue;
1981                 }
1982                 for (vp = mp->mnt_vnodelist.lh_first;
1983                      vp != NULL;
1984                      vp = vp->v_mntvnodes.le_next) {
1985                         if (VOP_ISLOCKED(vp))
1986                                 vprint((char *)0, vp);
1987                 }
1988                 simple_lock(&mountlist_slock);
1989                 nmp = mp->mnt_list.cqe_next;
1990                 vfs_unbusy(mp, p);
1991         }
1992         simple_unlock(&mountlist_slock);
1993 }
1994 #endif
1995
1996 /*
1997  * Top level filesystem related information gathering.
1998  */
1999 int
2000 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
2001         int *name;
2002         u_int namelen;
2003         void *oldp;
2004         size_t *oldlenp;
2005         void *newp;
2006         size_t newlen;
2007         struct proc *p;
2008 {
2009         struct ctldebug *cdp;
2010         struct vfsconf *vfsp;
2011
2012         if (name[0] == VFS_NUMMNTOPS) {
2013                 extern unsigned int vfs_nummntops;
2014                 return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops));
2015         }
2016
2017         /* all sysctl names at this level are at least name and field */
2018         if (namelen < 2)
2019                 return (ENOTDIR);               /* overloaded */
2020         if (name[0] != VFS_GENERIC) {
2021                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2022                         if (vfsp->vfc_typenum == name[0])
2023                                 break;
2024                 if (vfsp == NULL)
2025                         return (EOPNOTSUPP);
2026                 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
2027                     oldp, oldlenp, newp, newlen, p));
2028         }
2029         switch (name[1]) {
2030         case VFS_MAXTYPENUM:
2031                 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
2032         case VFS_CONF:
2033                 if (namelen < 3)
2034                         return (ENOTDIR);       /* overloaded */
2035                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2036                         if (vfsp->vfc_typenum == name[2])
2037                                 break;
2038                 if (vfsp == NULL)
2039                         return (EOPNOTSUPP);
2040                 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
2041                     sizeof(struct vfsconf)));
2042         }
2043         return (EOPNOTSUPP);
2044 }
2045
2046 int kinfo_vdebug = 1;
2047 #define KINFO_VNODESLOP 10
2048 /*
2049  * Dump vnode list (via sysctl).
2050  * Copyout address of vnode followed by vnode.
2051  */
2052 /* ARGSUSED */
2053 int
2054 sysctl_vnode(where, sizep, p)
2055         char *where;
2056         size_t *sizep;
2057         struct proc *p;
2058 {
2059         struct mount *mp, *nmp;
2060         struct vnode *nvp, *vp;
2061         char *bp = where, *savebp;
2062         char *ewhere;
2063         int error;
2064
2065 #define VPTRSZ  sizeof (struct vnode *)
2066 #define VNODESZ sizeof (struct vnode)
2067         if (where == NULL) {
2068                 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2069                 return (0);
2070         }
2071         ewhere = where + *sizep;
2072
2073         simple_lock(&mountlist_slock);
2074         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2075                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
2076                         nmp = mp->mnt_list.cqe_next;
2077                         continue;
2078                 }
2079                 savebp = bp;
2080 again:
2081                 simple_lock(&mntvnode_slock);
2082                 for (vp = mp->mnt_vnodelist.lh_first;
2083                      vp != NULL;
2084                      vp = nvp) {
2085                         /*
2086                          * Check that the vp is still associated with
2087                          * this filesystem.  RACE: could have been
2088                          * recycled onto the same filesystem.
2089                          */
2090                         if (vp->v_mount != mp) {
2091                                 simple_unlock(&mntvnode_slock);
2092                                 if (kinfo_vdebug)
2093                                         printf("kinfo: vp changed\n");
2094                                 bp = savebp;
2095                                 goto again;
2096                         }
2097                         nvp = vp->v_mntvnodes.le_next;
2098                         if (bp + VPTRSZ + VNODESZ > ewhere) {
2099                                 simple_unlock(&mntvnode_slock);
2100                                 *sizep = bp - where;
2101                                 return (ENOMEM);
2102                         }
2103                         simple_unlock(&mntvnode_slock);
2104                         if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
2105                            (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
2106                                 return (error);
2107                         bp += VPTRSZ + VNODESZ;
2108                         simple_lock(&mntvnode_slock);
2109                 }
2110                 simple_unlock(&mntvnode_slock);
2111                 simple_lock(&mountlist_slock);
2112                 nmp = mp->mnt_list.cqe_next;
2113                 vfs_unbusy(mp, p);
2114         }
2115         simple_unlock(&mountlist_slock);
2116
2117         *sizep = bp - where;
2118         return (0);
2119 }
2120
2121 /*
2122  * Check to see if a filesystem is mounted on a block device.
2123  */
2124 int
2125 vfs_mountedon(vp)
2126         struct vnode *vp;
2127 {
2128         struct vnode *vq;
2129         int error = 0;
2130
2131         if (vp->v_specflags & SI_MOUNTEDON)
2132                 return (EBUSY);
2133         if (vp->v_flag & VALIASED) {
2134                 simple_lock(&spechash_slock);
2135                 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
2136                         if (vq->v_rdev != vp->v_rdev ||
2137                             vq->v_type != vp->v_type)
2138                                 continue;
2139                         if (vq->v_specflags & SI_MOUNTEDON) {
2140                                 error = EBUSY;
2141                                 break;
2142                         }
2143                 }
2144                 simple_unlock(&spechash_slock);
2145         }
2146         return (error);
2147 }
2148
2149 /*
2150  * Unmount all filesystems. The list is traversed in reverse order
2151  * of mounting to avoid dependencies.
2152  */
2153 __private_extern__ void
2154 vfs_unmountall()
2155 {
2156         struct mount *mp, *nmp;
2157         struct proc *p = current_proc();
2158
2159         /*
2160          * Since this only runs when rebooting, it is not interlocked.
2161          */
2162         for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2163                 nmp = mp->mnt_list.cqe_prev;
2164                 (void) dounmount(mp, MNT_FORCE, p);
2165         }
2166 }
2167
2168 /*
2169  * Build hash lists of net addresses and hang them off the mount point.
2170  * Called by vfs_export() to set up the lists of export addresses.
2171  */
2172 static int
2173 vfs_hang_addrlist(mp, nep, argp)
2174         struct mount *mp;
2175         struct netexport *nep;
2176         struct export_args *argp;
2177 {
2178         register struct netcred *np;
2179         register struct radix_node_head *rnh;
2180         register int i;
2181         struct radix_node *rn;
2182         struct sockaddr *saddr, *smask = 0;
2183         struct domain *dom;
2184         int error;
2185
2186         if (argp->ex_addrlen == 0) {
2187                 if (mp->mnt_flag & MNT_DEFEXPORTED)
2188                         return (EPERM);
2189                 np = &nep->ne_defexported;
2190                 np->netc_exflags = argp->ex_flags;
2191                 np->netc_anon = argp->ex_anon;
2192                 np->netc_anon.cr_ref = 1;
2193                 mp->mnt_flag |= MNT_DEFEXPORTED;
2194                 return (0);
2195         }
2196         i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2197         MALLOC(np, struct netcred *, i, M_NETADDR, M_WAITOK);
2198         bzero((caddr_t)np, i);
2199         saddr = (struct sockaddr *)(np + 1);
2200         if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
2201                 goto out;
2202         if (saddr->sa_len > argp->ex_addrlen)
2203                 saddr->sa_len = argp->ex_addrlen;
2204         if (argp->ex_masklen) {
2205                 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
2206                 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
2207                 if (error)
2208                         goto out;
2209                 if (smask->sa_len > argp->ex_masklen)
2210                         smask->sa_len = argp->ex_masklen;
2211         }
2212         i = saddr->sa_family;
2213         if ((rnh = nep->ne_rtable[i]) == 0) {
2214                 /*
2215                  * Seems silly to initialize every AF when most are not
2216                  * used, do so on demand here
2217                  */
2218                 for (dom = domains; dom; dom = dom->dom_next)
2219                         if (dom->dom_family == i && dom->dom_rtattach) {
2220                                 dom->dom_rtattach((void **)&nep->ne_rtable[i],
2221                                         dom->dom_rtoffset);
2222                                 break;
2223                         }
2224                 if ((rnh = nep->ne_rtable[i]) == 0) {
2225                         error = ENOBUFS;
2226                         goto out;
2227                 }
2228         }
2229         rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
2230                 np->netc_rnodes);
2231         if (rn == 0) {
2232                 /*
2233                  * One of the reasons that rnh_addaddr may fail is that
2234                  * the entry already exists. To check for this case, we
2235                  * look up the entry to see if it is there. If so, we
2236                  * do not need to make a new entry but do return success.
2237                  */
2238                 _FREE(np, M_NETADDR);
2239                 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
2240                 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
2241                     ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
2242                     !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
2243                             (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
2244                         return (0);
2245                 return (EPERM);
2246         }
2247         np->netc_exflags = argp->ex_flags;
2248         np->netc_anon = argp->ex_anon;
2249         np->netc_anon.cr_ref = 1;
2250         return (0);
2251 out:
2252         _FREE(np, M_NETADDR);
2253         return (error);
2254 }
2255
2256 /* ARGSUSED */
2257 static int
2258 vfs_free_netcred(rn, w)
2259         struct radix_node *rn;
2260         caddr_t w;
2261 {
2262         register struct radix_node_head *rnh = (struct radix_node_head *)w;
2263
2264         (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
2265         _FREE((caddr_t)rn, M_NETADDR);
2266         return (0);
2267 }
2268
2269 /*
2270  * Free the net address hash lists that are hanging off the mount points.
2271  */
2272 static void
2273 vfs_free_addrlist(nep)
2274         struct netexport *nep;
2275 {
2276         register int i;
2277         register struct radix_node_head *rnh;
2278
2279         for (i = 0; i <= AF_MAX; i++)
2280                 if (rnh = nep->ne_rtable[i]) {
2281                         (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
2282                             (caddr_t)rnh);
2283                         _FREE((caddr_t)rnh, M_RTABLE);
2284                         nep->ne_rtable[i] = 0;
2285                 }
2286 }
2287
2288 int
2289 vfs_export(mp, nep, argp)
2290         struct mount *mp;
2291         struct netexport *nep;
2292         struct export_args *argp;
2293 {
2294         int error;
2295
2296         if (argp->ex_flags & MNT_DELEXPORT) {
2297                 vfs_free_addrlist(nep);
2298                 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2299         }
2300         if (argp->ex_flags & MNT_EXPORTED) {
2301                 if (error = vfs_hang_addrlist(mp, nep, argp))
2302                         return (error);
2303                 mp->mnt_flag |= MNT_EXPORTED;
2304         }
2305         return (0);
2306 }
2307
2308 struct netcred *
2309 vfs_export_lookup(mp, nep, nam)
2310         register struct mount *mp;
2311         struct netexport *nep;
2312         struct mbuf *nam;
2313 {
2314         register struct netcred *np;
2315         register struct radix_node_head *rnh;
2316         struct sockaddr *saddr;
2317
2318         np = NULL;
2319         if (mp->mnt_flag & MNT_EXPORTED) {
2320                 /*
2321                  * Lookup in the export list first.
2322                  */
2323                 if (nam != NULL) {
2324                         saddr = mtod(nam, struct sockaddr *);
2325                         rnh = nep->ne_rtable[saddr->sa_family];
2326                         if (rnh != NULL) {
2327                                 np = (struct netcred *)
2328                                         (*rnh->rnh_matchaddr)((caddr_t)saddr,
2329                                                               rnh);
2330                                 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2331                                         np = NULL;
2332                         }
2333                 }
2334                 /*
2335                  * If no address match, use the default if it exists.
2336                  */
2337                 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2338                         np = &nep->ne_defexported;
2339         }
2340         return (np);
2341 }
2342
2343 /*
2344  * try to reclaim vnodes from the memory
2345  * object cache
2346  */
2347 static int
2348 vm_object_cache_reclaim(int count)
2349 {
2350         int cnt;
2351         void vnode_pager_release_from_cache(int *);
2352
2353         /* attempt to reclaim vnodes from VM object cache */
2354         cnt = count;
2355         vnode_pager_release_from_cache(&cnt);
2356         return(cnt);
2357 }
2358
2359 /*
2360  * Release memory object reference held by inactive vnodes
2361  * and then try to reclaim some vnodes from the memory
2362  * object cache
2363  */
2364 static int
2365 vnreclaim(int count)
2366 {
2367         int cnt, i, loopcnt;
2368         void *obj;
2369         struct vnode *vp;
2370         int err;
2371         struct proc *p;
2372         kern_return_t kret;
2373
2374         i = 0;
2375         loopcnt = 0;
2376
2377         /* Try to release "count" vnodes from the inactive list */
2378 restart:
2379         if (++loopcnt > inactivevnodes) {
2380                 /*
2381                  * I did my best trying to reclaim the vnodes.
2382                  * Do not try any more as that would only lead to
2383                  * long latencies. Also in the worst case
2384                  * this can get totally CPU bound.
2385                  * Just fall though and attempt a reclaim of VM
2386                  * object cache
2387                  */
2388                 goto out;
2389         }
2390
2391         simple_lock(&vnode_free_list_slock);
2392         for (vp = TAILQ_FIRST(&vnode_inactive_list);
2393                         (vp != NULLVP) && (i < count);
2394                         vp = TAILQ_NEXT(vp, v_freelist)) {
2395
2396                 if (!simple_lock_try(&vp->v_interlock))
2397                         continue;
2398
2399                 if (vp->v_usecount != 1)
2400                         panic("vnreclaim: v_usecount");
2401
2402                 if(!UBCINFOEXISTS(vp)) {
2403                         if (vp->v_type == VBAD) {
2404                                 VREMINACTIVE("vnreclaim", vp);
2405                                 simple_unlock(&vp->v_interlock);
2406                                 continue;
2407                         } else
2408                                 panic("non UBC vnode on inactive list");
2409                                 /* Should not reach here */
2410                 }
2411
2412                 /* If vnode is already being reclaimed, wait */
2413                 if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
2414                         vp->v_flag |= VXWANT;
2415                         simple_unlock(&vp->v_interlock);
2416                         simple_unlock(&vnode_free_list_slock);
2417                         (void)tsleep((caddr_t)vp, PINOD, "vocr", 0);
2418                         goto restart;
2419                 }
2420
2421                 VREMINACTIVE("vnreclaim", vp);
2422                 simple_unlock(&vnode_free_list_slock);
2423
2424                 if (ubc_issetflags(vp, UI_WASMAPPED)) {
2425                         /*
2426                          * We should not reclaim as it is likely
2427                          * to be in use. Let it die a natural death.
2428                          * Release the UBC reference if one exists
2429                          * and put it back at the tail.
2430                          */
2431                         simple_unlock(&vp->v_interlock);
2432                         if (ubc_release_named(vp)) {
2433                                 if (UBCINFOEXISTS(vp)) {
2434                                         simple_lock(&vp->v_interlock);
2435                                         if (vp->v_usecount == 1 && !VONLIST(vp))
2436                                                 vinactive(vp);
2437                                         simple_unlock(&vp->v_interlock);
2438                                 }
2439                         } else {
2440                             simple_lock(&vp->v_interlock);
2441                                 vinactive(vp);
2442                                 simple_unlock(&vp->v_interlock);
2443                         }
2444                 } else {
2445                         int didhold;
2446
2447                         VORECLAIM_ENABLE(vp);
2448
2449                         /*
2450                          * scrub the dirty pages and invalidate the buffers
2451                          */
2452                         p = current_proc();
2453                         err = vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p);
2454                         if (err) {
2455                                 /* cannot reclaim */
2456                                 simple_lock(&vp->v_interlock);
2457                                 vinactive(vp);
2458                                 VORECLAIM_DISABLE(vp);
2459                                 i++;
2460                                 simple_unlock(&vp->v_interlock);
2461                                 goto restart;
2462                         }
2463
2464                         /* keep the vnode alive so we can kill it */
2465                         simple_lock(&vp->v_interlock);
2466                         if(vp->v_usecount != 1)
2467                                 panic("VOCR: usecount race");
2468                         vp->v_usecount++;
2469                         simple_unlock(&vp->v_interlock);
2470
2471                         /* clean up the state in VM without invalidating */
2472                         didhold = ubc_hold(vp);
2473                         if (didhold)
2474                                 (void)ubc_clean(vp, 0);
2475
2476                         /* flush and invalidate buffers associated with the vnode */
2477                         if (vp->v_tag == VT_NFS)
2478                                 nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
2479                         else
2480                                 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
2481
2482                         /*
2483                          * Note: for the v_usecount == 2 case, VOP_INACTIVE
2484                          * has not yet been called.  Call it now while vp is
2485                          * still locked, it will also release the lock.
2486                          */
2487                         if (vp->v_usecount == 2)
2488                                 VOP_INACTIVE(vp, p);
2489                         else
2490                                 VOP_UNLOCK(vp, 0, p);
2491
2492                         if (didhold)
2493                                 ubc_rele(vp);
2494
2495                         /*
2496                          * destroy the ubc named reference.
2497                          * If we can't because it is held for I/Os
2498                          * in progress, just put it back on the inactive
2499                          * list and move on.  Otherwise, the paging reference
2500                          * is toast (and so is this vnode?).
2501                          */
2502                         if (ubc_destroy_named(vp)) {
2503                             i++;
2504                         }
2505                         simple_lock(&vp->v_interlock);
2506                         VORECLAIM_DISABLE(vp);
2507                         simple_unlock(&vp->v_interlock);
2508                         vrele(vp);  /* release extra use we added here */
2509                 }
2510                 /* inactive list lock was released, must restart */
2511                 goto restart;
2512         }
2513         simple_unlock(&vnode_free_list_slock);
2514
2515         vnode_reclaim_tried += i;
2516 out:
2517         i = vm_object_cache_reclaim(count);
2518         vnode_objects_reclaimed += i;
2519
2520         return(i);
2521 }
2522
2523 /*
2524  * This routine is called from vnode_pager_no_senders()
2525  * which in turn can be called with vnode locked by vnode_uncache()
2526  * But it could also get called as a result of vm_object_cache_trim().
2527  * In that case lock state is unknown.
2528  * AGE the vnode so that it gets recycled quickly.
2529  * Check lock status to decide whether to call vput() or vrele().
2530  */
2531 __private_extern__ void
2532 vnode_pager_vrele(struct vnode *vp)
2533 {
2534
2535         boolean_t       funnel_state;
2536         int isvnreclaim = 1;
2537
2538         if (vp == (struct vnode *) NULL)
2539                 panic("vnode_pager_vrele: null vp");
2540
2541         funnel_state = thread_funnel_set(kernel_flock, TRUE);
2542
2543         /* Mark the vnode to be recycled */
2544         vagevp(vp);
2545
2546         simple_lock(&vp->v_interlock);
2547         /*
2548          * If a vgone (or vclean) is already in progress,
2549          * Do not bother with the ubc_info cleanup.
2550          * Let the vclean deal with it.
2551          */
2552         if (vp->v_flag & VXLOCK) {
2553                 CLR(vp->v_flag, VTERMINATE);
2554                 if (ISSET(vp->v_flag, VTERMWANT)) {
2555                         CLR(vp->v_flag, VTERMWANT);
2556                         wakeup((caddr_t)&vp->v_ubcinfo);
2557                 }
2558                 simple_unlock(&vp->v_interlock);
2559                 vrele(vp);
2560                 (void) thread_funnel_set(kernel_flock, funnel_state);
2561                 return;
2562         }
2563
2564         /* It's dead, Jim! */
2565         if (!ISSET(vp->v_flag, VORECLAIM)) {
2566                 /*
2567                  * called as a result of eviction of the memory
2568                  * object from the memory object cache
2569                  */
2570                 isvnreclaim = 0;
2571
2572                 /* So serialize vnode operations */
2573                 VORECLAIM_ENABLE(vp);
2574         }
2575         if (!ISSET(vp->v_flag, VTERMINATE))
2576                 SET(vp->v_flag, VTERMINATE);
2577         if (UBCINFOEXISTS(vp)) {
2578                 struct ubc_info *uip = vp->v_ubcinfo;
2579
2580                 if (ubc_issetflags(vp, UI_WASMAPPED))
2581                         SET(vp->v_flag, VWASMAPPED);
2582
2583                 vp->v_ubcinfo = UBC_NOINFO;  /* catch bad accesses */
2584                 simple_unlock(&vp->v_interlock);
2585                 ubc_info_deallocate(uip);
2586         } else {
2587                 if ((vp->v_type == VBAD) && ((vp)->v_ubcinfo != UBC_INFO_NULL)
2588                         && ((vp)->v_ubcinfo != UBC_NOINFO)) {
2589                         struct ubc_info *uip = vp->v_ubcinfo;
2590
2591                         vp->v_ubcinfo = UBC_NOINFO;  /* catch bad accesses */
2592                         simple_unlock(&vp->v_interlock);
2593                         ubc_info_deallocate(uip);
2594                 } else {
2595                         simple_unlock(&vp->v_interlock);
2596                 }
2597         }
2598
2599         CLR(vp->v_flag, VTERMINATE);
2600
2601         if (vp->v_type != VBAD){
2602                 vgone(vp);      /* revoke the vnode */
2603                 vrele(vp);      /* and drop the reference */
2604         } else
2605                 vrele(vp);
2606
2607         if (ISSET(vp->v_flag, VTERMWANT)) {
2608                 CLR(vp->v_flag, VTERMWANT);
2609                 wakeup((caddr_t)&vp->v_ubcinfo);
2610         }
2611         if (!isvnreclaim)
2612                 VORECLAIM_DISABLE(vp);
2613         (void) thread_funnel_set(kernel_flock, funnel_state);
2614         return;
2615 }
2616
2617
2618 #if DIAGNOSTIC
2619 int walk_vnodes_debug=0;
2620
2621 void
2622 walk_allvnodes()
2623 {
2624         struct proc *p = current_proc();
2625         struct mount *mp, *nmp;
2626         struct vnode *vp;
2627         int cnt = 0;
2628
2629         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2630                 for (vp = mp->mnt_vnodelist.lh_first;
2631                      vp != NULL;
2632                      vp = vp->v_mntvnodes.le_next) {
2633                         if (vp->v_usecount < 0){
2634                                 if(walk_vnodes_debug) {
2635                                         printf("vp is %x\n",vp);
2636                                 }
2637                         }
2638                 }
2639                 nmp = mp->mnt_list.cqe_next;
2640         }
2641         for (cnt = 0, vp = vnode_free_list.tqh_first;
2642                 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2643                 if ((vp->v_usecount < 0) && walk_vnodes_debug) {
2644                         if(walk_vnodes_debug) {
2645                                 printf("vp is %x\n",vp);
2646                         }
2647                 }
2648         }
2649         printf("%d - free\n", cnt);
2650
2651         for (cnt = 0, vp = vnode_inactive_list.tqh_first;
2652                 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2653                 if ((vp->v_usecount < 0) && walk_vnodes_debug) {
2654                         if(walk_vnodes_debug) {
2655                                 printf("vp is %x\n",vp);
2656                         }
2657                 }
2658         }
2659         printf("%d - inactive\n", cnt);
2660 }
2661 #endif /* DIAGNOSTIC */
2662
2663 void
2664 vfs_io_attributes(vp, flags, iosize, vectors)
2665         struct vnode    *vp;
2666         int     flags;  /* B_READ or B_WRITE */
2667         int     *iosize;
2668         int     *vectors;
2669 {
2670         struct mount *mp;
2671
2672         /* start with "reasonable" defaults */
2673         *iosize = MAXPHYS;
2674         *vectors = 32;
2675
2676         mp = vp->v_mount;
2677         if (mp != NULL) {
2678                 switch (flags) {
2679                 case B_READ:
2680                         *iosize = mp->mnt_maxreadcnt;
2681                         *vectors = mp->mnt_segreadcnt;
2682                         break;
2683                 case B_WRITE:
2684                         *iosize = mp->mnt_maxwritecnt;
2685                         *vectors = mp->mnt_segwritecnt;
2686                         break;
2687                 default:
2688                         break;
2689                 }
2690         }
2691
2692         return;
2693 }
2694
2695 #include <dev/disk.h>
2696
2697 int
2698 vfs_init_io_attributes(devvp, mp)
2699         struct vnode *devvp;
2700         struct mount *mp;
2701 {
2702         int error;
2703         off_t readblockcnt;
2704         off_t writeblockcnt;
2705         off_t readsegcnt;
2706         off_t writesegcnt;
2707         u_long blksize;
2708
2709         u_int64_t temp;
2710
2711         struct proc *p = current_proc();
2712         struct  ucred *cred = p->p_ucred;
2713
2714         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD,
2715                                 (caddr_t)&readblockcnt, 0, cred, p)))
2716                 return (error);
2717
2718         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE,
2719                                 (caddr_t)&writeblockcnt, 0, cred, p)))
2720                 return (error);
2721
2722         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD,
2723                                 (caddr_t)&readsegcnt, 0, cred, p)))
2724                 return (error);
2725
2726         if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE,
2727                                 (caddr_t)&writesegcnt, 0, cred, p)))
2728                 return (error);
2729
2730         if ((error = VOP_IOCTL(devvp, DKIOCGETBLOCKSIZE,
2731                                 (caddr_t)&blksize, 0, cred, p)))
2732                 return (error);
2733
2734         temp = readblockcnt * blksize;
2735         temp = (temp > UINT32_MAX) ? (UINT32_MAX / blksize) * blksize : temp;
2736         mp->mnt_maxreadcnt = (u_int32_t)temp;
2737
2738         temp = writeblockcnt * blksize;
2739         temp = (temp > UINT32_MAX) ? (UINT32_MAX / blksize) * blksize : temp;
2740         mp->mnt_maxwritecnt = (u_int32_t)temp;
2741
2742         temp = (readsegcnt > UINT16_MAX) ? UINT16_MAX : readsegcnt;
2743         mp->mnt_segreadcnt = (u_int16_t)temp;
2744
2745         temp = (writesegcnt > UINT16_MAX) ? UINT16_MAX : writesegcnt;
2746         mp->mnt_segwritecnt = (u_int16_t)temp;
2747
2748 #if 0
2749         printf("--- IO attributes for mount point 0x%08x ---\n", mp);
2750         printf("\tmnt_maxreadcnt = 0x%x", mp->mnt_maxreadcnt);
2751         printf("\tmnt_maxwritecnt = 0x%x\n", mp->mnt_maxwritecnt);
2752         printf("\tmnt_segreadcnt = 0x%x", mp->mnt_segreadcnt);
2753         printf("\tmnt_segwritecnt = 0x%x\n", mp->mnt_segwritecnt);
2754 #endif /* 0 */
2755
2756         return (error);
2757 }
2758