]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_subr.c
xnu-123.5.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_subr.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23 /*
24 * Copyright (c) 1989, 1993
25 * The Regents of the University of California. All rights reserved.
26 * (c) UNIX System Laboratories, Inc.
27 * All or some portions of this file are derived from material licensed
28 * to the University of California by American Telephone and Telegraph
29 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
30 * the permission of UNIX System Laboratories, Inc.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
61 */
62
63 /*
64 * External virtual filesystem routines
65 */
66
67 #define DIAGNOSTIC 1
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/proc.h>
72 #include <sys/mount.h>
73 #include <sys/time.h>
74 #include <sys/vnode.h>
75 #include <sys/stat.h>
76 #include <sys/namei.h>
77 #include <sys/ucred.h>
78 #include <sys/buf.h>
79 #include <sys/errno.h>
80 #include <sys/malloc.h>
81 #include <sys/domain.h>
82 #include <sys/mbuf.h>
83 #include <sys/syslog.h>
84 #include <sys/ubc.h>
85 #include <sys/vm.h>
86 #include <sys/sysctl.h>
87
88 #include <kern/assert.h>
89
90 #include <miscfs/specfs/specdev.h>
91
92 enum vtype iftovt_tab[16] = {
93 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
94 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
95 };
96 int vttoif_tab[9] = {
97 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
98 S_IFSOCK, S_IFIFO, S_IFMT,
99 };
100
101 static void vfree(struct vnode *vp);
102 static void vinactive(struct vnode *vp);
103 extern int vnreclaim(int count);
104 extern kern_return_t
105 adjust_vm_object_cache(vm_size_t oval, vm_size_t nval);
106
107 /*
108 * Insq/Remq for the vnode usage lists.
109 */
110 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
111 #define bufremvn(bp) { \
112 LIST_REMOVE(bp, b_vnbufs); \
113 (bp)->b_vnbufs.le_next = NOLIST; \
114 }
115
116 TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
117 TAILQ_HEAD(inactivelst, vnode) vnode_inactive_list; /* vnode inactive list */
118 struct mntlist mountlist; /* mounted filesystem list */
119
120 #if DIAGNOSTIC
121 #define VLISTCHECK(fun, vp, list) \
122 if ((vp)->v_freelist.tqe_prev == (struct vnode **)0xdeadb) \
123 panic("%s: %s vnode not on %slist", (fun), (list), (list));
124
125 #define VINACTIVECHECK(fun, vp, expected) \
126 do { \
127 int __is_inactive = ISSET((vp)->v_flag, VUINACTIVE); \
128 if (__is_inactive ^ expected) \
129 panic("%s: %sinactive vnode, expected %s", (fun), \
130 __is_inactive? "" : "not ", \
131 expected? "inactive": "not inactive"); \
132 } while(0)
133 #else
134 #define VLISTCHECK(fun, vp, list)
135 #define VINACTIVECHECK(fun, vp, expected)
136 #endif /* DIAGNOSTIC */
137
138 #define VLISTNONE(vp) \
139 do { \
140 (vp)->v_freelist.tqe_next = (struct vnode *)0; \
141 (vp)->v_freelist.tqe_prev = (struct vnode **)0xdeadb; \
142 } while(0)
143
144 #define VONLIST(vp) \
145 ((vp)->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
146
147 /* remove a vnode from free vnode list */
148 #define VREMFREE(fun, vp) \
149 do { \
150 VLISTCHECK((fun), (vp), "free"); \
151 TAILQ_REMOVE(&vnode_free_list, (vp), v_freelist); \
152 VLISTNONE((vp)); \
153 freevnodes--; \
154 } while(0)
155
156 /* remove a vnode from inactive vnode list */
157 #define VREMINACTIVE(fun, vp) \
158 do { \
159 VLISTCHECK((fun), (vp), "inactive"); \
160 VINACTIVECHECK((fun), (vp), VUINACTIVE); \
161 TAILQ_REMOVE(&vnode_inactive_list, (vp), v_freelist); \
162 CLR((vp)->v_flag, VUINACTIVE); \
163 VLISTNONE((vp)); \
164 inactivevnodes--; \
165 } while(0)
166
167 #define VORECLAIM_ENABLE(vp) \
168 do { \
169 if (ISSET((vp)->v_flag, VORECLAIM)) \
170 panic("vm object raclaim already"); \
171 SET((vp)->v_flag, VORECLAIM); \
172 } while(0)
173
174 #define VORECLAIM_DISABLE(vp) \
175 do { \
176 CLR((vp)->v_flag, VORECLAIM); \
177 if (ISSET((vp)->v_flag, VXWANT)) { \
178 CLR((vp)->v_flag, VXWANT); \
179 wakeup((caddr_t)(vp)); \
180 } \
181 } while(0)
182
183 /*
184 * Have to declare first two locks as actual data even if !MACH_SLOCKS, since
185 * a pointers to them get passed around.
186 */
187 simple_lock_data_t mountlist_slock;
188 simple_lock_data_t mntvnode_slock;
189 decl_simple_lock_data(,mntid_slock);
190 decl_simple_lock_data(,vnode_free_list_slock);
191 decl_simple_lock_data(,spechash_slock);
192
193 /*
194 * vnodetarget is the amount of vnodes we expect to get back
195 * from the the inactive vnode list and VM object cache.
196 * As vnreclaim() is a mainly cpu bound operation for faster
197 * processers this number could be higher.
198 * Having this number too high introduces longer delays in
199 * the execution of getnewvnode().
200 */
201 unsigned long vnodetarget; /* target for vnreclaim() */
202 #define VNODE_FREE_TARGET 20 /* Default value for vnodetarget */
203
204 /*
205 * We need quite a few vnodes on the free list to sustain the
206 * rapid stat() the compilation process does, and still benefit from the name
207 * cache. Having too few vnodes on the free list causes serious disk
208 * thrashing as we cycle through them.
209 */
210 #define VNODE_FREE_MIN 100 /* freelist should have at least these many */
211
212 /*
213 * We need to get vnodes back from the VM object cache when a certain #
214 * of vnodes are reused from the freelist. This is essential for the
215 * caching to be effective in the namecache and the buffer cache [for the
216 * metadata].
217 */
218 #define VNODE_TOOMANY_REUSED (VNODE_FREE_MIN/4)
219
220 /*
221 * If we have enough vnodes on the freelist we do not want to reclaim
222 * the vnodes from the VM object cache.
223 */
224 #define VNODE_FREE_ENOUGH (VNODE_FREE_MIN + (VNODE_FREE_MIN/2))
225
226 /*
227 * Initialize the vnode management data structures.
228 */
229 void
230 vntblinit()
231 {
232 extern struct lock__bsd__ exchangelock;
233
234 simple_lock_init(&mountlist_slock);
235 simple_lock_init(&mntvnode_slock);
236 simple_lock_init(&mntid_slock);
237 simple_lock_init(&spechash_slock);
238 TAILQ_INIT(&vnode_free_list);
239 simple_lock_init(&vnode_free_list_slock);
240 TAILQ_INIT(&vnode_inactive_list);
241 CIRCLEQ_INIT(&mountlist);
242 lockinit(&exchangelock, PVFS, "exchange", 0, 0);
243
244 if (!vnodetarget)
245 vnodetarget = VNODE_FREE_TARGET;
246
247 /*
248 * Scale the vm_object_cache to accomodate the vnodes
249 * we want to cache
250 */
251 (void) adjust_vm_object_cache(0, desiredvnodes - VNODE_FREE_MIN);
252 }
253
254 /* Reset the VM Object Cache with the values passed in */
255 kern_return_t
256 reset_vmobjectcache(unsigned int val1, unsigned int val2)
257 {
258 vm_size_t oval = val1 - VNODE_FREE_MIN;
259 vm_size_t nval = val2 - VNODE_FREE_MIN;
260
261 return(adjust_vm_object_cache(oval, nval));
262 }
263
264 /*
265 * Mark a mount point as busy. Used to synchronize access and to delay
266 * unmounting. Interlock is not released on failure.
267 */
268 int
269 vfs_busy(mp, flags, interlkp, p)
270 struct mount *mp;
271 int flags;
272 struct slock *interlkp;
273 struct proc *p;
274 {
275 int lkflags;
276
277 if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
278 if (flags & LK_NOWAIT)
279 return (ENOENT);
280 mp->mnt_kern_flag |= MNTK_MWAIT;
281 if (interlkp)
282 simple_unlock(interlkp);
283 /*
284 * Since all busy locks are shared except the exclusive
285 * lock granted when unmounting, the only place that a
286 * wakeup needs to be done is at the release of the
287 * exclusive lock at the end of dounmount.
288 */
289 sleep((caddr_t)mp, PVFS);
290 if (interlkp)
291 simple_lock(interlkp);
292 return (ENOENT);
293 }
294 lkflags = LK_SHARED;
295 if (interlkp)
296 lkflags |= LK_INTERLOCK;
297 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
298 panic("vfs_busy: unexpected lock failure");
299 return (0);
300 }
301
302 /*
303 * Free a busy filesystem.
304 */
305 void
306 vfs_unbusy(mp, p)
307 struct mount *mp;
308 struct proc *p;
309 {
310
311 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
312 }
313
314 /*
315 * Lookup a filesystem type, and if found allocate and initialize
316 * a mount structure for it.
317 *
318 * Devname is usually updated by mount(8) after booting.
319 */
320 int
321 vfs_rootmountalloc(fstypename, devname, mpp)
322 char *fstypename;
323 char *devname;
324 struct mount **mpp;
325 {
326 struct proc *p = current_proc(); /* XXX */
327 struct vfsconf *vfsp;
328 struct mount *mp;
329
330 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
331 if (!strcmp(vfsp->vfc_name, fstypename))
332 break;
333 if (vfsp == NULL)
334 return (ENODEV);
335 mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
336 bzero((char *)mp, (u_long)sizeof(struct mount));
337 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
338 (void)vfs_busy(mp, LK_NOWAIT, 0, p);
339 LIST_INIT(&mp->mnt_vnodelist);
340 mp->mnt_vfc = vfsp;
341 mp->mnt_op = vfsp->vfc_vfsops;
342 mp->mnt_flag = MNT_RDONLY;
343 mp->mnt_vnodecovered = NULLVP;
344 vfsp->vfc_refcount++;
345 mp->mnt_stat.f_type = vfsp->vfc_typenum;
346 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
347 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
348 mp->mnt_stat.f_mntonname[0] = '/';
349 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
350 *mpp = mp;
351 return (0);
352 }
353
354 /*
355 * Find an appropriate filesystem to use for the root. If a filesystem
356 * has not been preselected, walk through the list of known filesystems
357 * trying those that have mountroot routines, and try them until one
358 * works or we have tried them all.
359 */
360 int
361 vfs_mountroot()
362 {
363 struct vfsconf *vfsp;
364 extern int (*mountroot)(void);
365 int error;
366
367 if (mountroot != NULL) {
368 error = (*mountroot)();
369 return (error);
370 }
371
372 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
373 if (vfsp->vfc_mountroot == NULL)
374 continue;
375 if ((error = (*vfsp->vfc_mountroot)()) == 0)
376 return (0);
377 if (error != EINVAL)
378 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
379 }
380 return (ENODEV);
381 }
382
383 /*
384 * Lookup a mount point by filesystem identifier.
385 */
386 struct mount *
387 vfs_getvfs(fsid)
388 fsid_t *fsid;
389 {
390 register struct mount *mp;
391
392 simple_lock(&mountlist_slock);
393 for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
394 mp = mp->mnt_list.cqe_next) {
395 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
396 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
397 simple_unlock(&mountlist_slock);
398 return (mp);
399 }
400 }
401 simple_unlock(&mountlist_slock);
402 return ((struct mount *)0);
403 }
404
405 /*
406 * Get a new unique fsid
407 */
408 void
409 vfs_getnewfsid(mp)
410 struct mount *mp;
411 {
412 static u_short xxxfs_mntid;
413
414 fsid_t tfsid;
415 int mtype;
416
417 simple_lock(&mntid_slock);
418 mtype = mp->mnt_vfc->vfc_typenum;
419 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
420 mp->mnt_stat.f_fsid.val[1] = mtype;
421 if (xxxfs_mntid == 0)
422 ++xxxfs_mntid;
423 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
424 tfsid.val[1] = mtype;
425 if (mountlist.cqh_first != (void *)&mountlist) {
426 while (vfs_getvfs(&tfsid)) {
427 tfsid.val[0]++;
428 xxxfs_mntid++;
429 }
430 }
431 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
432 simple_unlock(&mntid_slock);
433 }
434
435 /*
436 * Set vnode attributes to VNOVAL
437 */
438 void
439 vattr_null(vap)
440 register struct vattr *vap;
441 {
442
443 vap->va_type = VNON;
444 vap->va_size = vap->va_bytes = VNOVAL;
445 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
446 vap->va_fsid = vap->va_fileid =
447 vap->va_blocksize = vap->va_rdev =
448 vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
449 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
450 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
451 vap->va_flags = vap->va_gen = VNOVAL;
452 vap->va_vaflags = 0;
453 }
454
455 /*
456 * Routines having to do with the management of the vnode table.
457 */
458 extern int (**dead_vnodeop_p)(void *);
459 static void vclean __P((struct vnode *vp, int flag, struct proc *p));
460 extern void vgonel __P((struct vnode *vp, struct proc *p));
461 long numvnodes, freevnodes;
462 long inactivevnodes;
463 long vnode_reclaim_tried;
464 long vnode_objects_reclaimed;
465
466
467 extern struct vattr va_null;
468
469 /*
470 * Return the next vnode from the free list.
471 */
472 int
473 getnewvnode(tag, mp, vops, vpp)
474 enum vtagtype tag;
475 struct mount *mp;
476 int (**vops)(void *);
477 struct vnode **vpp;
478 {
479 struct proc *p = current_proc(); /* XXX */
480 struct vnode *vp;
481 int cnt, didretry = 0;
482 static int reused = 0; /* track the reuse rate */
483 int reclaimhits = 0;
484
485 retry:
486 simple_lock(&vnode_free_list_slock);
487 /*
488 * MALLOC a vnode if the number of vnodes has not reached the desired
489 * value and the number on the free list is still reasonable...
490 * reuse from the freelist even though we may evict a name cache entry
491 * to reduce the number of vnodes that accumulate.... vnodes tie up
492 * wired memory and are never garbage collected
493 */
494 if (numvnodes < desiredvnodes && (freevnodes < (2 * VNODE_FREE_MIN))) {
495 numvnodes++;
496 simple_unlock(&vnode_free_list_slock);
497 MALLOC_ZONE(vp, struct vnode *, sizeof *vp, M_VNODE, M_WAITOK);
498 bzero((char *)vp, sizeof *vp);
499 VLISTNONE(vp); /* avoid double queue removal */
500 simple_lock_init(&vp->v_interlock);
501 goto done;
502 }
503
504 /*
505 * Once the desired number of vnodes are allocated,
506 * we start reusing the vnodes.
507 */
508 if (freevnodes < VNODE_FREE_MIN) {
509 /*
510 * if we are low on vnodes on the freelist attempt to get
511 * some back from the inactive list and VM object cache
512 */
513 simple_unlock(&vnode_free_list_slock);
514 (void)vnreclaim(vnodetarget);
515 simple_lock(&vnode_free_list_slock);
516 }
517 if (numvnodes >= desiredvnodes && reused > VNODE_TOOMANY_REUSED) {
518 reused = 0;
519 if (freevnodes < VNODE_FREE_ENOUGH) {
520 simple_unlock(&vnode_free_list_slock);
521 (void)vnreclaim(vnodetarget);
522 simple_lock(&vnode_free_list_slock);
523 }
524 }
525
526 for (cnt = 0, vp = vnode_free_list.tqh_first;
527 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
528 if (simple_lock_try(&vp->v_interlock)) {
529 /* got the interlock */
530 if (ISSET(vp->v_flag, VORECLAIM)) {
531 /* skip over the vnodes that are being reclaimed */
532 simple_unlock(&vp->v_interlock);
533 reclaimhits++;
534 } else
535 break;
536 }
537 }
538
539 /*
540 * Unless this is a bad time of the month, at most
541 * the first NCPUS items on the free list are
542 * locked, so this is close enough to being empty.
543 */
544 if (vp == NULLVP) {
545 simple_unlock(&vnode_free_list_slock);
546 if (!(didretry++) && (vnreclaim(vnodetarget) > 0))
547 goto retry;
548 tablefull("vnode");
549 log(LOG_EMERG, "%d vnodes locked, %d desired, %d numvnodes, "
550 "%d free, %d inactive, %d being reclaimed\n",
551 cnt, desiredvnodes, numvnodes, freevnodes, inactivevnodes,
552 reclaimhits);
553 *vpp = 0;
554 return (ENFILE);
555 }
556
557 if (vp->v_usecount)
558 panic("free vnode isn't: v_type = %d, v_usecount = %d?",
559 vp->v_type, vp->v_usecount);
560
561 VREMFREE("getnewvnode", vp);
562 reused++;
563 simple_unlock(&vnode_free_list_slock);
564 vp->v_lease = NULL;
565 cache_purge(vp);
566 if (vp->v_type != VBAD)
567 vgonel(vp, p); /* clean and reclaim the vnode */
568 else
569 simple_unlock(&vp->v_interlock);
570 #if DIAGNOSTIC
571 if (vp->v_data)
572 panic("cleaned vnode isn't");
573 {
574 int s = splbio();
575 if (vp->v_numoutput)
576 panic("Clean vnode has pending I/O's");
577 splx(s);
578 }
579 #endif
580 if (UBCINFOEXISTS(vp))
581 panic("getnewvnode: ubcinfo not cleaned");
582 else
583 vp->v_ubcinfo = 0;
584
585 vp->v_lastr = -1;
586 vp->v_ralen = 0;
587 vp->v_maxra = 0;
588 vp->v_lastw = 0;
589 vp->v_ciosiz = 0;
590 vp->v_cstart = 0;
591 vp->v_clen = 0;
592 vp->v_socket = 0;
593
594 done:
595 vp->v_flag = VSTANDARD;
596 vp->v_type = VNON;
597 vp->v_tag = tag;
598 vp->v_op = vops;
599 insmntque(vp, mp);
600 *vpp = vp;
601 vp->v_usecount = 1;
602 vp->v_data = 0;
603 return (0);
604 }
605
606 /*
607 * Move a vnode from one mount queue to another.
608 */
609 void
610 insmntque(vp, mp)
611 struct vnode *vp;
612 struct mount *mp;
613 {
614
615 simple_lock(&mntvnode_slock);
616 /*
617 * Delete from old mount point vnode list, if on one.
618 */
619 if (vp->v_mount != NULL)
620 LIST_REMOVE(vp, v_mntvnodes);
621 /*
622 * Insert into list of vnodes for the new mount point, if available.
623 */
624 if ((vp->v_mount = mp) != NULL)
625 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
626 simple_unlock(&mntvnode_slock);
627 }
628
629 /*
630 * Update outstanding I/O count and do wakeup if requested.
631 */
632 void
633 vwakeup(bp)
634 register struct buf *bp;
635 {
636 register struct vnode *vp;
637
638 CLR(bp->b_flags, B_WRITEINPROG);
639 if (vp = bp->b_vp) {
640 if (--vp->v_numoutput < 0)
641 panic("vwakeup: neg numoutput");
642 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
643 if (vp->v_numoutput < 0)
644 panic("vwakeup: neg numoutput 2");
645 vp->v_flag &= ~VBWAIT;
646 wakeup((caddr_t)&vp->v_numoutput);
647 }
648 }
649 }
650
651 /*
652 * Flush out and invalidate all buffers associated with a vnode.
653 * Called with the underlying object locked.
654 */
655 int
656 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
657 register struct vnode *vp;
658 int flags;
659 struct ucred *cred;
660 struct proc *p;
661 int slpflag, slptimeo;
662 {
663 register struct buf *bp;
664 struct buf *nbp, *blist;
665 int s, error = 0;
666
667 if (flags & V_SAVE) {
668 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) {
669 return (error);
670 }
671 if (vp->v_dirtyblkhd.lh_first != NULL || (vp->v_flag & VHASDIRTY))
672 panic("vinvalbuf: dirty bufs");
673 }
674
675 for (;;) {
676 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
677 while (blist && blist->b_lblkno < 0)
678 blist = blist->b_vnbufs.le_next;
679 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
680 (flags & V_SAVEMETA))
681 while (blist && blist->b_lblkno < 0)
682 blist = blist->b_vnbufs.le_next;
683 if (!blist)
684 break;
685
686 for (bp = blist; bp; bp = nbp) {
687 nbp = bp->b_vnbufs.le_next;
688 if (flags & V_SAVEMETA && bp->b_lblkno < 0)
689 continue;
690 s = splbio();
691 if (ISSET(bp->b_flags, B_BUSY)) {
692 SET(bp->b_flags, B_WANTED);
693 error = tsleep((caddr_t)bp,
694 slpflag | (PRIBIO + 1), "vinvalbuf",
695 slptimeo);
696 splx(s);
697 if (error) {
698 return (error);
699 }
700 break;
701 }
702 bremfree(bp);
703 SET(bp->b_flags, B_BUSY);
704 splx(s);
705 /*
706 * XXX Since there are no node locks for NFS, I believe
707 * there is a slight chance that a delayed write will
708 * occur while sleeping just above, so check for it.
709 */
710 if (ISSET(bp->b_flags, B_DELWRI) && (flags & V_SAVE)) {
711 (void) VOP_BWRITE(bp);
712 break;
713 }
714 SET(bp->b_flags, B_INVAL);
715 brelse(bp);
716 }
717 }
718 if (!(flags & V_SAVEMETA) &&
719 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
720 panic("vinvalbuf: flush failed");
721 return (0);
722 }
723
724 /*
725 * Associate a buffer with a vnode.
726 */
727 void
728 bgetvp(vp, bp)
729 register struct vnode *vp;
730 register struct buf *bp;
731 {
732
733 if (bp->b_vp)
734 panic("bgetvp: not free");
735 VHOLD(vp);
736 bp->b_vp = vp;
737 if (vp->v_type == VBLK || vp->v_type == VCHR)
738 bp->b_dev = vp->v_rdev;
739 else
740 bp->b_dev = NODEV;
741 /*
742 * Insert onto list for new vnode.
743 */
744 bufinsvn(bp, &vp->v_cleanblkhd);
745 }
746
747 /*
748 * Disassociate a buffer from a vnode.
749 */
750 void
751 brelvp(bp)
752 register struct buf *bp;
753 {
754 struct vnode *vp;
755
756 if (bp->b_vp == (struct vnode *) 0)
757 panic("brelvp: NULL");
758 /*
759 * Delete from old vnode list, if on one.
760 */
761 if (bp->b_vnbufs.le_next != NOLIST)
762 bufremvn(bp);
763 vp = bp->b_vp;
764 bp->b_vp = (struct vnode *) 0;
765 HOLDRELE(vp);
766 }
767
768 /*
769 * Reassign a buffer from one vnode to another.
770 * Used to assign file specific control information
771 * (indirect blocks) to the vnode to which they belong.
772 */
773 void
774 reassignbuf(bp, newvp)
775 register struct buf *bp;
776 register struct vnode *newvp;
777 {
778 register struct buflists *listheadp;
779
780 if (newvp == NULL) {
781 printf("reassignbuf: NULL");
782 return;
783 }
784 /*
785 * Delete from old vnode list, if on one.
786 */
787 if (bp->b_vnbufs.le_next != NOLIST)
788 bufremvn(bp);
789 /*
790 * If dirty, put on list of dirty buffers;
791 * otherwise insert onto list of clean buffers.
792 */
793 if (ISSET(bp->b_flags, B_DELWRI))
794 listheadp = &newvp->v_dirtyblkhd;
795 else
796 listheadp = &newvp->v_cleanblkhd;
797 bufinsvn(bp, listheadp);
798 }
799
800 /*
801 * Create a vnode for a block device.
802 * Used for root filesystem, argdev, and swap areas.
803 * Also used for memory file system special devices.
804 */
805 int
806 bdevvp(dev, vpp)
807 dev_t dev;
808 struct vnode **vpp;
809 {
810 register struct vnode *vp;
811 struct vnode *nvp;
812 int error;
813
814 if (dev == NODEV) {
815 *vpp = NULLVP;
816 return (ENODEV);
817 }
818 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
819 if (error) {
820 *vpp = NULLVP;
821 return (error);
822 }
823 vp = nvp;
824 vp->v_type = VBLK;
825 if (nvp = checkalias(vp, dev, (struct mount *)0)) {
826 vput(vp);
827 vp = nvp;
828 }
829 *vpp = vp;
830 return (0);
831 }
832
833 /*
834 * Check to see if the new vnode represents a special device
835 * for which we already have a vnode (either because of
836 * bdevvp() or because of a different vnode representing
837 * the same block device). If such an alias exists, deallocate
838 * the existing contents and return the aliased vnode. The
839 * caller is responsible for filling it with its new contents.
840 */
841 struct vnode *
842 checkalias(nvp, nvp_rdev, mp)
843 register struct vnode *nvp;
844 dev_t nvp_rdev;
845 struct mount *mp;
846 {
847 struct proc *p = current_proc(); /* XXX */
848 struct vnode *vp;
849 struct vnode **vpp;
850 struct specinfo * bufhold;
851 int buffree = 1;
852
853 if (nvp->v_type != VBLK && nvp->v_type != VCHR)
854 return (NULLVP);
855
856 bufhold = (struct specinfo *)_MALLOC_ZONE(sizeof(struct specinfo),
857 M_VNODE, M_WAITOK);
858 vpp = &speclisth[SPECHASH(nvp_rdev)];
859 loop:
860 simple_lock(&spechash_slock);
861 for (vp = *vpp; vp; vp = vp->v_specnext) {
862 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
863 continue;
864 /*
865 * Alias, but not in use, so flush it out.
866 */
867 simple_lock(&vp->v_interlock);
868 if (vp->v_usecount == 0) {
869 simple_unlock(&spechash_slock);
870 vgonel(vp, p);
871 goto loop;
872 }
873 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
874 simple_unlock(&spechash_slock);
875 goto loop;
876 }
877 break;
878 }
879 if (vp == NULL || vp->v_tag != VT_NON) {
880 nvp->v_specinfo = bufhold;
881 buffree = 0; /* buffer used */
882 bzero(nvp->v_specinfo, sizeof(struct specinfo));
883 nvp->v_rdev = nvp_rdev;
884 nvp->v_hashchain = vpp;
885 nvp->v_specnext = *vpp;
886 nvp->v_specflags = 0;
887 simple_unlock(&spechash_slock);
888 *vpp = nvp;
889 if (vp != NULLVP) {
890 nvp->v_flag |= VALIASED;
891 vp->v_flag |= VALIASED;
892 vput(vp);
893 }
894 /* Since buffer is used just return */
895 return (NULLVP);
896 }
897 simple_unlock(&spechash_slock);
898 VOP_UNLOCK(vp, 0, p);
899 simple_lock(&vp->v_interlock);
900 vclean(vp, 0, p);
901 vp->v_op = nvp->v_op;
902 vp->v_tag = nvp->v_tag;
903 nvp->v_type = VNON;
904 insmntque(vp, mp);
905 if (buffree)
906 _FREE_ZONE((void *)bufhold, sizeof (struct specinfo), M_VNODE);
907 return (vp);
908 }
909
910 /*
911 * Grab a particular vnode from the free list, increment its
912 * reference count and lock it. The vnode lock bit is set the
913 * vnode is being eliminated in vgone. The process is awakened
914 * when the transition is completed, and an error returned to
915 * indicate that the vnode is no longer usable (possibly having
916 * been changed to a new file system type).
917 */
918 int
919 vget(vp, flags, p)
920 struct vnode *vp;
921 int flags;
922 struct proc *p;
923 {
924 int error = 0;
925
926 /*
927 * If the vnode is in the process of being cleaned out for
928 * another use, we wait for the cleaning to finish and then
929 * return failure. Cleaning is determined by checking that
930 * the VXLOCK flag is set.
931 */
932 if ((flags & LK_INTERLOCK) == 0)
933 simple_lock(&vp->v_interlock);
934 if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
935 vp->v_flag |= VXWANT;
936 simple_unlock(&vp->v_interlock);
937 tsleep((caddr_t)vp, PINOD, "vget", 0);
938 return (ENOENT);
939 }
940
941 /*
942 * vnode is being terminated.
943 * wait for vnode_pager_no_senders() to clear VTERMINATE
944 */
945 if (ISSET(vp->v_flag, VTERMINATE)) {
946 SET(vp->v_flag, VTERMWANT);
947 simple_unlock(&vp->v_interlock);
948 tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vclean", 0);
949 return (ENOENT);
950 }
951
952 simple_lock(&vnode_free_list_slock);
953 /* If on the free list, remove it from there */
954 if (vp->v_usecount == 0) {
955 if (VONLIST(vp))
956 VREMFREE("vget", vp);
957 } else {
958 /* If on the inactive list, remove it from there */
959 if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
960 if (VONLIST(vp))
961 VREMINACTIVE("vget", vp);
962 }
963 }
964
965 /* The vnode should not be on the inactive list here */
966 VINACTIVECHECK("vget", vp, 0);
967
968 simple_unlock(&vnode_free_list_slock);
969 if (++vp->v_usecount <= 0)
970 panic("vget: v_usecount");
971
972 if (flags & LK_TYPE_MASK) {
973 if (error = vn_lock(vp, flags | LK_INTERLOCK, p)) {
974 /*
975 * If the vnode was not active in the first place
976 * must not call vrele() as VOP_INACTIVE() is not
977 * required.
978 * So inlined part of vrele() here.
979 */
980 simple_lock(&vp->v_interlock);
981 if (--vp->v_usecount == 1) {
982 if (UBCINFOEXISTS(vp)) {
983 vinactive(vp);
984 simple_unlock(&vp->v_interlock);
985 return (error);
986 }
987 }
988 if (vp->v_usecount > 0) {
989 simple_unlock(&vp->v_interlock);
990 return (error);
991 }
992 if (vp->v_usecount < 0)
993 panic("vget: negative usecount (%d)", vp->v_usecount);
994 vfree(vp);
995 simple_unlock(&vp->v_interlock);
996 }
997 return (error);
998 }
999
1000 /*
1001 * If this is a valid UBC vnode, if usecount is 1 and if
1002 * this vnode was mapped in the past, it is likely
1003 * that ubc_info freed due to the memory object getting recycled.
1004 * Just re-initialize the ubc_info.
1005 */
1006 if ((vp->v_usecount == 1) && UBCISVALID(vp)) {
1007 if (UBCINFOMISSING(vp))
1008 panic("vget: lost ubc_info");
1009
1010 if (ISSET(vp->v_flag, VTERMINATE)) {
1011 /*
1012 * vnode is being terminated.
1013 * wait for vnode_pager_no_senders() to clear
1014 * VTERMINATE
1015 */
1016 SET(vp->v_flag, VTERMWANT);
1017 simple_unlock(&vp->v_interlock);
1018 tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vclean", 0);
1019 /* return error */
1020 return (ENOENT);
1021 }
1022
1023 if ((!UBCINFOEXISTS(vp)) && ISSET(vp->v_flag, VWASMAPPED)) {
1024 simple_unlock(&vp->v_interlock);
1025 ubc_info_init(vp);
1026 simple_lock(&vp->v_interlock);
1027 } else
1028 panic("vget: stolen ubc_info");
1029
1030 if (!ubc_issetflags(vp, UI_HASOBJREF))
1031 if (ubc_getobject(vp, (UBC_NOREACTIVATE|UBC_HOLDOBJECT)))
1032 panic("vget: null object");
1033 }
1034 out:
1035 if ((flags & LK_INTERLOCK) == 0)
1036 simple_unlock(&vp->v_interlock);
1037 return (0);
1038 }
1039
1040 /*
1041 * Stubs to use when there is no locking to be done on the underlying object.
1042 * A minimal shared lock is necessary to ensure that the underlying object
1043 * is not revoked while an operation is in progress. So, an active shared
1044 * count is maintained in an auxillary vnode lock structure.
1045 */
1046 int
1047 vop_nolock(ap)
1048 struct vop_lock_args /* {
1049 struct vnode *a_vp;
1050 int a_flags;
1051 struct proc *a_p;
1052 } */ *ap;
1053 {
1054 #ifdef notyet
1055 /*
1056 * This code cannot be used until all the non-locking filesystems
1057 * (notably NFS) are converted to properly lock and release nodes.
1058 * Also, certain vnode operations change the locking state within
1059 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
1060 * and symlink). Ideally these operations should not change the
1061 * lock state, but should be changed to let the caller of the
1062 * function unlock them. Otherwise all intermediate vnode layers
1063 * (such as union, umapfs, etc) must catch these functions to do
1064 * the necessary locking at their layer. Note that the inactive
1065 * and lookup operations also change their lock state, but this
1066 * cannot be avoided, so these two operations will always need
1067 * to be handled in intermediate layers.
1068 */
1069 struct vnode *vp = ap->a_vp;
1070 int vnflags, flags = ap->a_flags;
1071
1072 if (vp->v_vnlock == NULL) {
1073 if ((flags & LK_TYPE_MASK) == LK_DRAIN)
1074 return (0);
1075 MALLOC_ZONE(vp->v_vnlock, struct lock__bsd__ *,
1076 sizeof(struct lock__bsd__), M_VNODE, M_WAITOK);
1077 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1078 }
1079 switch (flags & LK_TYPE_MASK) {
1080 case LK_DRAIN:
1081 vnflags = LK_DRAIN;
1082 break;
1083 case LK_EXCLUSIVE:
1084 case LK_SHARED:
1085 vnflags = LK_SHARED;
1086 break;
1087 case LK_UPGRADE:
1088 case LK_EXCLUPGRADE:
1089 case LK_DOWNGRADE:
1090 return (0);
1091 case LK_RELEASE:
1092 default:
1093 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
1094 }
1095 if (flags & LK_INTERLOCK)
1096 vnflags |= LK_INTERLOCK;
1097 return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
1098 #else /* for now */
1099 /*
1100 * Since we are not using the lock manager, we must clear
1101 * the interlock here.
1102 */
1103 if (ap->a_flags & LK_INTERLOCK)
1104 simple_unlock(&ap->a_vp->v_interlock);
1105 return (0);
1106 #endif
1107 }
1108
1109 /*
1110 * Decrement the active use count.
1111 */
1112 int
1113 vop_nounlock(ap)
1114 struct vop_unlock_args /* {
1115 struct vnode *a_vp;
1116 int a_flags;
1117 struct proc *a_p;
1118 } */ *ap;
1119 {
1120 struct vnode *vp = ap->a_vp;
1121
1122 if (vp->v_vnlock == NULL)
1123 return (0);
1124 return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p));
1125 }
1126
1127 /*
1128 * Return whether or not the node is in use.
1129 */
1130 int
1131 vop_noislocked(ap)
1132 struct vop_islocked_args /* {
1133 struct vnode *a_vp;
1134 } */ *ap;
1135 {
1136 struct vnode *vp = ap->a_vp;
1137
1138 if (vp->v_vnlock == NULL)
1139 return (0);
1140 return (lockstatus(vp->v_vnlock));
1141 }
1142
1143 /*
1144 * Vnode reference.
1145 */
1146 void
1147 vref(vp)
1148 struct vnode *vp;
1149 {
1150
1151 simple_lock(&vp->v_interlock);
1152 if (vp->v_usecount <= 0)
1153 panic("vref used where vget required");
1154
1155 /* If on the inactive list, remove it from there */
1156 if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
1157 if (VONLIST(vp)) {
1158 simple_lock(&vnode_free_list_slock);
1159 VREMINACTIVE("vref", vp);
1160 simple_unlock(&vnode_free_list_slock);
1161 }
1162 }
1163 /* The vnode should not be on the inactive list here */
1164 VINACTIVECHECK("vref", vp, 0);
1165
1166 if (++vp->v_usecount <= 0)
1167 panic("vref v_usecount");
1168 simple_unlock(&vp->v_interlock);
1169 }
1170
1171 /*
1172 * put the vnode on appropriate free list.
1173 * called with v_interlock held.
1174 */
1175 static void
1176 vfree(vp)
1177 struct vnode *vp;
1178 {
1179 /*
1180 * if the vnode is not obtained by calling getnewvnode() we
1181 * are not responsible for the cleanup. Just return.
1182 */
1183 if (!(vp->v_flag & VSTANDARD)) {
1184 return;
1185 }
1186
1187 if (vp->v_usecount != 0)
1188 panic("vfree: v_usecount");
1189
1190 /* insert at tail of LRU list or at head if VAGE is set */
1191 simple_lock(&vnode_free_list_slock);
1192
1193 if (VONLIST(vp))
1194 panic("vfree: vnode still on list");
1195
1196 if (vp->v_flag & VAGE) {
1197 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1198 vp->v_flag &= ~VAGE;
1199 } else
1200 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1201 freevnodes++;
1202 simple_unlock(&vnode_free_list_slock);
1203 return;
1204 }
1205
1206 /*
1207 * put the vnode on the inactive list.
1208 * called with v_interlock held
1209 */
1210 static void
1211 vinactive(vp)
1212 struct vnode *vp;
1213 {
1214 if (!UBCINFOEXISTS(vp))
1215 panic("vinactive: not a UBC vnode");
1216
1217 if (vp->v_usecount != 1)
1218 panic("vinactive: v_usecount");
1219
1220 simple_lock(&vnode_free_list_slock);
1221
1222 if (VONLIST(vp))
1223 panic("vinactive: vnode still on list");
1224 VINACTIVECHECK("vinactive", vp, 0);
1225
1226 TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_freelist);
1227 SET(vp->v_flag, VUINACTIVE);
1228 CLR(vp->v_flag, (VNOCACHE_DATA | VRAOFF));
1229
1230 inactivevnodes++;
1231 simple_unlock(&vnode_free_list_slock);
1232 return;
1233 }
1234
1235
1236 /*
1237 * vput(), just unlock and vrele()
1238 */
1239 void
1240 vput(vp)
1241 struct vnode *vp;
1242 {
1243 struct proc *p = current_proc(); /* XXX */
1244
1245 #if DIAGNOSTIC
1246 if (vp == NULL)
1247 panic("vput: null vp");
1248 #endif
1249 simple_lock(&vp->v_interlock);
1250 if (--vp->v_usecount == 1) {
1251 if (UBCINFOEXISTS(vp)) {
1252 vinactive(vp);
1253 simple_unlock(&vp->v_interlock);
1254 VOP_UNLOCK(vp, 0, p);
1255 return;
1256 }
1257 }
1258 if (vp->v_usecount > 0) {
1259 simple_unlock(&vp->v_interlock);
1260 VOP_UNLOCK(vp, 0, p);
1261 return;
1262 }
1263 #if DIAGNOSTIC
1264 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1265 vprint("vput: bad ref count", vp);
1266 panic("vput: v_usecount = %d, v_writecount = %d",
1267 vp->v_usecount, vp->v_writecount);
1268 }
1269 #endif
1270 if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
1271 VREMINACTIVE("vrele", vp);
1272
1273 simple_unlock(&vp->v_interlock);
1274 VOP_INACTIVE(vp, p);
1275 /*
1276 * The interlock is not held and
1277 * VOP_INCATIVE releases the vnode lock.
1278 * We could block and the vnode might get reactivated
1279 * Can not just call vfree without checking the state
1280 */
1281 simple_lock(&vp->v_interlock);
1282 if (!VONLIST(vp)) {
1283 if (vp->v_usecount == 0)
1284 vfree(vp);
1285 else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp))
1286 vinactive(vp);
1287 }
1288 simple_unlock(&vp->v_interlock);
1289 }
1290
1291 /*
1292 * Vnode release.
1293 * If count drops to zero, call inactive routine and return to freelist.
1294 */
1295 void
1296 vrele(vp)
1297 struct vnode *vp;
1298 {
1299 struct proc *p = current_proc(); /* XXX */
1300
1301 #if DIAGNOSTIC
1302 if (vp == NULL)
1303 panic("vrele: null vp");
1304 #endif
1305 simple_lock(&vp->v_interlock);
1306 if (--vp->v_usecount == 1) {
1307 if (UBCINFOEXISTS(vp)) {
1308 vinactive(vp);
1309 simple_unlock(&vp->v_interlock);
1310 return;
1311 }
1312 }
1313 if (vp->v_usecount > 0) {
1314 simple_unlock(&vp->v_interlock);
1315 return;
1316 }
1317 #if DIAGNOSTIC
1318 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1319 vprint("vrele: bad ref count", vp);
1320 panic("vrele: ref cnt");
1321 }
1322 #endif
1323 if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
1324 VREMINACTIVE("vrele", vp);
1325
1326
1327 if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
1328 /* vnode is being cleaned, just return */
1329 vfree(vp);
1330 simple_unlock(&vp->v_interlock);
1331 return;
1332 }
1333
1334 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1335 VOP_INACTIVE(vp, p);
1336 /*
1337 * vn_lock releases the interlock and
1338 * VOP_INCATIVE releases the vnode lock.
1339 * We could block and the vnode might get reactivated
1340 * Can not just call vfree without checking the state
1341 */
1342 simple_lock(&vp->v_interlock);
1343 if (!VONLIST(vp)) {
1344 if (vp->v_usecount == 0)
1345 vfree(vp);
1346 else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp))
1347 vinactive(vp);
1348 }
1349 simple_unlock(&vp->v_interlock);
1350 }
1351 #if 0
1352 else {
1353 vfree(vp);
1354 simple_unlock(&vp->v_interlock);
1355 kprintf("vrele: vn_lock() failed for vp = 0x%08x\n", vp);
1356 }
1357 #endif
1358 }
1359
1360 void
1361 vagevp(vp)
1362 struct vnode *vp;
1363 {
1364 assert(vp);
1365 simple_lock(&vp->v_interlock);
1366 vp->v_flag |= VAGE;
1367 simple_unlock(&vp->v_interlock);
1368 return;
1369 }
1370
1371 /*
1372 * Page or buffer structure gets a reference.
1373 */
1374 void
1375 vhold(vp)
1376 register struct vnode *vp;
1377 {
1378
1379 simple_lock(&vp->v_interlock);
1380 vp->v_holdcnt++;
1381 simple_unlock(&vp->v_interlock);
1382 }
1383
1384 /*
1385 * Page or buffer structure frees a reference.
1386 */
1387 void
1388 holdrele(vp)
1389 register struct vnode *vp;
1390 {
1391
1392 simple_lock(&vp->v_interlock);
1393 if (vp->v_holdcnt <= 0)
1394 panic("holdrele: holdcnt");
1395 vp->v_holdcnt--;
1396 simple_unlock(&vp->v_interlock);
1397 }
1398
1399 /*
1400 * Remove any vnodes in the vnode table belonging to mount point mp.
1401 *
1402 * If MNT_NOFORCE is specified, there should not be any active ones,
1403 * return error if any are found (nb: this is a user error, not a
1404 * system error). If MNT_FORCE is specified, detach any active vnodes
1405 * that are found.
1406 */
1407 #if DIAGNOSTIC
1408 int busyprt = 0; /* print out busy vnodes */
1409 #if 0
1410 struct ctldebug debug1 = { "busyprt", &busyprt };
1411 #endif /* 0 */
1412 #endif
1413
1414 int
1415 vflush(mp, skipvp, flags)
1416 struct mount *mp;
1417 struct vnode *skipvp;
1418 int flags;
1419 {
1420 struct proc *p = current_proc(); /* XXX */
1421 struct vnode *vp, *nvp;
1422 int busy = 0;
1423
1424 simple_lock(&mntvnode_slock);
1425 loop:
1426 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1427 if (vp->v_mount != mp)
1428 goto loop;
1429 nvp = vp->v_mntvnodes.le_next;
1430 /*
1431 * Skip over a selected vnode.
1432 */
1433 if (vp == skipvp)
1434 continue;
1435
1436 simple_lock(&vp->v_interlock);
1437 /*
1438 * Skip over a vnodes marked VSYSTEM.
1439 */
1440 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1441 simple_unlock(&vp->v_interlock);
1442 continue;
1443 }
1444 /*
1445 * Skip over a vnodes marked VSWAP.
1446 */
1447 if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) {
1448 simple_unlock(&vp->v_interlock);
1449 continue;
1450 }
1451 /*
1452 * If WRITECLOSE is set, only flush out regular file
1453 * vnodes open for writing.
1454 */
1455 if ((flags & WRITECLOSE) &&
1456 (vp->v_writecount == 0 || vp->v_type != VREG)) {
1457 simple_unlock(&vp->v_interlock);
1458 continue;
1459 }
1460 /*
1461 * With v_usecount == 0, all we need to do is clear
1462 * out the vnode data structures and we are done.
1463 */
1464 if (vp->v_usecount == 0) {
1465 simple_unlock(&mntvnode_slock);
1466 vgonel(vp, p);
1467 simple_lock(&mntvnode_slock);
1468 continue;
1469 }
1470 /*
1471 * If FORCECLOSE is set, forcibly close the vnode.
1472 * For block or character devices, revert to an
1473 * anonymous device. For all other files, just kill them.
1474 */
1475 if (flags & FORCECLOSE) {
1476 simple_unlock(&mntvnode_slock);
1477 if (vp->v_type != VBLK && vp->v_type != VCHR) {
1478 vgonel(vp, p);
1479 } else {
1480 vclean(vp, 0, p);
1481 vp->v_op = spec_vnodeop_p;
1482 insmntque(vp, (struct mount *)0);
1483 }
1484 simple_lock(&mntvnode_slock);
1485 continue;
1486 }
1487 #if DIAGNOSTIC
1488 if (busyprt)
1489 vprint("vflush: busy vnode", vp);
1490 #endif
1491 simple_unlock(&vp->v_interlock);
1492 busy++;
1493 }
1494 simple_unlock(&mntvnode_slock);
1495 if (busy)
1496 return (EBUSY);
1497 return (0);
1498 }
1499
1500 /*
1501 * Disassociate the underlying file system from a vnode.
1502 * The vnode interlock is held on entry.
1503 */
1504 static void
1505 vclean(vp, flags, p)
1506 struct vnode *vp;
1507 int flags;
1508 struct proc *p;
1509 {
1510 int active;
1511 void *obj;
1512 int removed = 0;
1513
1514 /*
1515 * if the vnode is not obtained by calling getnewvnode() we
1516 * are not responsible for the cleanup. Just return.
1517 */
1518 if (!(vp->v_flag & VSTANDARD)) {
1519 simple_unlock(&vp->v_interlock);
1520 return;
1521 }
1522
1523 /*
1524 * Check to see if the vnode is in use.
1525 * If so we have to reference it before we clean it out
1526 * so that its count cannot fall to zero and generate a
1527 * race against ourselves to recycle it.
1528 */
1529 if (active = vp->v_usecount)
1530 if (++vp->v_usecount <= 0)
1531 panic("vclean: v_usecount");
1532 /*
1533 * Prevent the vnode from being recycled or
1534 * brought into use while we clean it out.
1535 */
1536 if (vp->v_flag & VXLOCK)
1537 panic("vclean: deadlock");
1538 vp->v_flag |= VXLOCK;
1539
1540 /*
1541 * Even if the count is zero, the VOP_INACTIVE routine may still
1542 * have the object locked while it cleans it out. The VOP_LOCK
1543 * ensures that the VOP_INACTIVE routine is done with its work.
1544 * For active vnodes, it ensures that no other activity can
1545 * occur while the underlying object is being cleaned out.
1546 */
1547 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1548
1549 /*
1550 * if this vnode is on the inactive list
1551 * take it off the list.
1552 */
1553 if ((active == 1) &&
1554 (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))) {
1555 simple_lock(&vnode_free_list_slock);
1556 VREMINACTIVE("vclean", vp);
1557 simple_unlock(&vnode_free_list_slock);
1558 removed++;
1559 }
1560
1561 /* Clean the pages in VM. */
1562 if ((active) && UBCINFOEXISTS(vp)) {
1563 (void)ubc_clean(vp, 0); /* do not invalidate */
1564 }
1565
1566 /*
1567 * Clean out any buffers associated with the vnode.
1568 */
1569 if (flags & DOCLOSE) {
1570 if (vp->v_tag == VT_NFS)
1571 nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
1572 else
1573 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1574 }
1575 /*
1576 * If purging an active vnode, it must be closed and
1577 * deactivated before being reclaimed. Note that the
1578 * VOP_INACTIVE will unlock the vnode.
1579 */
1580 if (active) {
1581 if (flags & DOCLOSE)
1582 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1583 VOP_INACTIVE(vp, p);
1584 } else {
1585 /*
1586 * Any other processes trying to obtain this lock must first
1587 * wait for VXLOCK to clear, then call the new lock operation.
1588 */
1589 VOP_UNLOCK(vp, 0, p);
1590 }
1591 /*
1592 * Reclaim the vnode.
1593 */
1594 if (VOP_RECLAIM(vp, p))
1595 panic("vclean: cannot reclaim");
1596 if (active)
1597 vrele(vp);
1598 cache_purge(vp);
1599 if (vp->v_vnlock) {
1600 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1601 vprint("vclean: lock not drained", vp);
1602 FREE_ZONE(vp->v_vnlock, sizeof (struct lock__bsd__), M_VNODE);
1603 vp->v_vnlock = NULL;
1604 }
1605
1606 /* It's dead, Jim! */
1607 vp->v_op = dead_vnodeop_p;
1608 vp->v_tag = VT_NON;
1609
1610 /*
1611 * v_data is reclaimed by VOP_RECLAIM, all the vnode
1612 * operation generated by the code below would be directed
1613 * to the deadfs
1614 */
1615 if (UBCINFOEXISTS(vp)) {
1616 /* vnode is dying, destroy the object */
1617 if (ubc_issetflags(vp, UI_HASOBJREF)) {
1618 obj = ubc_getobject(vp, UBC_NOREACTIVATE);
1619 if (obj == NULL)
1620 panic("vclean: null object");
1621 if (ISSET(vp->v_flag, VTERMINATE))
1622 panic("vclean: already teminating");
1623 SET(vp->v_flag, VTERMINATE);
1624
1625 ubc_clearflags(vp, UI_HASOBJREF);
1626 memory_object_destroy(obj, 0);
1627
1628 /*
1629 * memory_object_destroy() is asynchronous with respect
1630 * to vnode_pager_no_senders().
1631 * wait for vnode_pager_no_senders() to clear
1632 * VTERMINATE
1633 */
1634 while (ISSET(vp->v_flag, VTERMINATE)) {
1635 SET(vp->v_flag, VTERMWANT);
1636 tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vclean", 0);
1637 }
1638 if (UBCINFOEXISTS(vp)) {
1639 ubc_info_free(vp);
1640 vp->v_ubcinfo = UBC_NOINFO; /* catch bad accesses */
1641 }
1642 }
1643 }
1644
1645 /*
1646 * Done with purge, notify sleepers of the grim news.
1647 */
1648 vp->v_flag &= ~VXLOCK;
1649 if (vp->v_flag & VXWANT) {
1650 vp->v_flag &= ~VXWANT;
1651 wakeup((caddr_t)vp);
1652 }
1653 }
1654
1655 /*
1656 * Eliminate all activity associated with the requested vnode
1657 * and with all vnodes aliased to the requested vnode.
1658 */
1659 int
1660 vop_revoke(ap)
1661 struct vop_revoke_args /* {
1662 struct vnode *a_vp;
1663 int a_flags;
1664 } */ *ap;
1665 {
1666 struct vnode *vp, *vq;
1667 struct proc *p = current_proc(); /* XXX */
1668
1669 #if DIAGNOSTIC
1670 if ((ap->a_flags & REVOKEALL) == 0)
1671 panic("vop_revoke");
1672 #endif
1673
1674 vp = ap->a_vp;
1675 simple_lock(&vp->v_interlock);
1676
1677 if (vp->v_flag & VALIASED) {
1678 /*
1679 * If a vgone (or vclean) is already in progress,
1680 * wait until it is done and return.
1681 */
1682 if (vp->v_flag & VXLOCK) {
1683 while (vp->v_flag & VXLOCK) {
1684 vp->v_flag |= VXWANT;
1685 simple_unlock(&vp->v_interlock);
1686 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1687 }
1688 return (0);
1689 }
1690 /*
1691 * Ensure that vp will not be vgone'd while we
1692 * are eliminating its aliases.
1693 */
1694 vp->v_flag |= VXLOCK;
1695 simple_unlock(&vp->v_interlock);
1696 while (vp->v_flag & VALIASED) {
1697 simple_lock(&spechash_slock);
1698 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1699 if (vq->v_rdev != vp->v_rdev ||
1700 vq->v_type != vp->v_type || vp == vq)
1701 continue;
1702 simple_unlock(&spechash_slock);
1703 vgone(vq);
1704 break;
1705 }
1706 if (vq == NULLVP)
1707 simple_unlock(&spechash_slock);
1708 }
1709 /*
1710 * Remove the lock so that vgone below will
1711 * really eliminate the vnode after which time
1712 * vgone will awaken any sleepers.
1713 */
1714 simple_lock(&vp->v_interlock);
1715 vp->v_flag &= ~VXLOCK;
1716 }
1717 vgonel(vp, p);
1718 return (0);
1719 }
1720
1721 /*
1722 * Recycle an unused vnode to the front of the free list.
1723 * Release the passed interlock if the vnode will be recycled.
1724 */
1725 int
1726 vrecycle(vp, inter_lkp, p)
1727 struct vnode *vp;
1728 struct slock *inter_lkp;
1729 struct proc *p;
1730 {
1731
1732 simple_lock(&vp->v_interlock);
1733 if (vp->v_usecount == 0) {
1734 if (inter_lkp)
1735 simple_unlock(inter_lkp);
1736 vgonel(vp, p);
1737 return (1);
1738 }
1739 simple_unlock(&vp->v_interlock);
1740 return (0);
1741 }
1742
1743 /*
1744 * Eliminate all activity associated with a vnode
1745 * in preparation for reuse.
1746 */
1747 void
1748 vgone(vp)
1749 struct vnode *vp;
1750 {
1751 struct proc *p = current_proc(); /* XXX */
1752
1753 simple_lock(&vp->v_interlock);
1754 vgonel(vp, p);
1755 }
1756
1757 /*
1758 * vgone, with the vp interlock held.
1759 */
1760 void
1761 vgonel(vp, p)
1762 struct vnode *vp;
1763 struct proc *p;
1764 {
1765 struct vnode *vq;
1766 struct vnode *vx;
1767
1768 /*
1769 * if the vnode is not obtained by calling getnewvnode() we
1770 * are not responsible for the cleanup. Just return.
1771 */
1772 if (!(vp->v_flag & VSTANDARD)) {
1773 simple_unlock(&vp->v_interlock);
1774 return;
1775 }
1776
1777 /*
1778 * If a vgone (or vclean) is already in progress,
1779 * wait until it is done and return.
1780 */
1781 if (vp->v_flag & VXLOCK) {
1782 while (vp->v_flag & VXLOCK) {
1783 vp->v_flag |= VXWANT;
1784 simple_unlock(&vp->v_interlock);
1785 tsleep((caddr_t)vp, PINOD, "vgone", 0);
1786 }
1787 return;
1788 }
1789 /*
1790 * Clean out the filesystem specific data.
1791 */
1792 vclean(vp, DOCLOSE, p);
1793 /*
1794 * Delete from old mount point vnode list, if on one.
1795 */
1796 if (vp->v_mount != NULL)
1797 insmntque(vp, (struct mount *)0);
1798 /*
1799 * If special device, remove it from special device alias list
1800 * if it is on one.
1801 */
1802 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1803 simple_lock(&spechash_slock);
1804 if (*vp->v_hashchain == vp) {
1805 *vp->v_hashchain = vp->v_specnext;
1806 } else {
1807 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1808 if (vq->v_specnext != vp)
1809 continue;
1810 vq->v_specnext = vp->v_specnext;
1811 break;
1812 }
1813 if (vq == NULL)
1814 panic("missing bdev");
1815 }
1816 if (vp->v_flag & VALIASED) {
1817 vx = NULL;
1818 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1819 if (vq->v_rdev != vp->v_rdev ||
1820 vq->v_type != vp->v_type)
1821 continue;
1822 if (vx)
1823 break;
1824 vx = vq;
1825 }
1826 if (vx == NULL)
1827 panic("missing alias");
1828 if (vq == NULL)
1829 vx->v_flag &= ~VALIASED;
1830 vp->v_flag &= ~VALIASED;
1831 }
1832 simple_unlock(&spechash_slock);
1833 FREE_ZONE(vp->v_specinfo, sizeof (struct specinfo), M_VNODE);
1834 vp->v_specinfo = NULL;
1835 }
1836 /*
1837 * If it is on the freelist and not already at the head,
1838 * move it to the head of the list. The test of the back
1839 * pointer and the reference count of zero is because
1840 * it will be removed from the free list by getnewvnode,
1841 * but will not have its reference count incremented until
1842 * after calling vgone. If the reference count were
1843 * incremented first, vgone would (incorrectly) try to
1844 * close the previous instance of the underlying object.
1845 * So, the back pointer is explicitly set to `0xdeadb' in
1846 * getnewvnode after removing it from the freelist to ensure
1847 * that we do not try to move it here.
1848 */
1849 if (vp->v_usecount == 0) {
1850 simple_lock(&vnode_free_list_slock);
1851 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1852 vnode_free_list.tqh_first != vp) {
1853 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1854 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1855 }
1856 simple_unlock(&vnode_free_list_slock);
1857 }
1858 vp->v_type = VBAD;
1859 }
1860
1861 /*
1862 * Lookup a vnode by device number.
1863 */
1864 int
1865 vfinddev(dev, type, vpp)
1866 dev_t dev;
1867 enum vtype type;
1868 struct vnode **vpp;
1869 {
1870 struct vnode *vp;
1871 int rc = 0;
1872
1873 simple_lock(&spechash_slock);
1874 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1875 if (dev != vp->v_rdev || type != vp->v_type)
1876 continue;
1877 *vpp = vp;
1878 rc = 1;
1879 break;
1880 }
1881 simple_unlock(&spechash_slock);
1882 return (rc);
1883 }
1884
1885 /*
1886 * Calculate the total number of references to a special device.
1887 */
1888 int
1889 vcount(vp)
1890 struct vnode *vp;
1891 {
1892 struct vnode *vq, *vnext;
1893 int count;
1894
1895 loop:
1896 if ((vp->v_flag & VALIASED) == 0)
1897 return (vp->v_usecount);
1898 simple_lock(&spechash_slock);
1899 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1900 vnext = vq->v_specnext;
1901 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1902 continue;
1903 /*
1904 * Alias, but not in use, so flush it out.
1905 */
1906 if (vq->v_usecount == 0 && vq != vp) {
1907 simple_unlock(&spechash_slock);
1908 vgone(vq);
1909 goto loop;
1910 }
1911 count += vq->v_usecount;
1912 }
1913 simple_unlock(&spechash_slock);
1914 return (count);
1915 }
1916
1917 int prtactive = 0; /* 1 => print out reclaim of active vnodes */
1918
1919 /*
1920 * Print out a description of a vnode.
1921 */
1922 static char *typename[] =
1923 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1924
1925 void
1926 vprint(label, vp)
1927 char *label;
1928 register struct vnode *vp;
1929 {
1930 char buf[64];
1931
1932 if (label != NULL)
1933 printf("%s: ", label);
1934 printf("type %s, usecount %d, writecount %d, refcount %d,",
1935 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1936 vp->v_holdcnt);
1937 buf[0] = '\0';
1938 if (vp->v_flag & VROOT)
1939 strcat(buf, "|VROOT");
1940 if (vp->v_flag & VTEXT)
1941 strcat(buf, "|VTEXT");
1942 if (vp->v_flag & VSYSTEM)
1943 strcat(buf, "|VSYSTEM");
1944 if (vp->v_flag & VXLOCK)
1945 strcat(buf, "|VXLOCK");
1946 if (vp->v_flag & VXWANT)
1947 strcat(buf, "|VXWANT");
1948 if (vp->v_flag & VBWAIT)
1949 strcat(buf, "|VBWAIT");
1950 if (vp->v_flag & VALIASED)
1951 strcat(buf, "|VALIASED");
1952 if (buf[0] != '\0')
1953 printf(" flags (%s)", &buf[1]);
1954 if (vp->v_data == NULL) {
1955 printf("\n");
1956 } else {
1957 printf("\n\t");
1958 VOP_PRINT(vp);
1959 }
1960 }
1961
1962 #ifdef DEBUG
1963 /*
1964 * List all of the locked vnodes in the system.
1965 * Called when debugging the kernel.
1966 */
1967 void
1968 printlockedvnodes()
1969 {
1970 struct proc *p = current_proc(); /* XXX */
1971 struct mount *mp, *nmp;
1972 struct vnode *vp;
1973
1974 printf("Locked vnodes\n");
1975 simple_lock(&mountlist_slock);
1976 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1977 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1978 nmp = mp->mnt_list.cqe_next;
1979 continue;
1980 }
1981 for (vp = mp->mnt_vnodelist.lh_first;
1982 vp != NULL;
1983 vp = vp->v_mntvnodes.le_next) {
1984 if (VOP_ISLOCKED(vp))
1985 vprint((char *)0, vp);
1986 }
1987 simple_lock(&mountlist_slock);
1988 nmp = mp->mnt_list.cqe_next;
1989 vfs_unbusy(mp, p);
1990 }
1991 simple_unlock(&mountlist_slock);
1992 }
1993 #endif
1994
1995 /*
1996 * Top level filesystem related information gathering.
1997 */
1998 int
1999 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
2000 int *name;
2001 u_int namelen;
2002 void *oldp;
2003 size_t *oldlenp;
2004 void *newp;
2005 size_t newlen;
2006 struct proc *p;
2007 {
2008 struct ctldebug *cdp;
2009 struct vfsconf *vfsp;
2010
2011 if (name[0] == VFS_NUMMNTOPS) {
2012 extern unsigned int vfs_nummntops;
2013 return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops));
2014 }
2015
2016 /* all sysctl names at this level are at least name and field */
2017 if (namelen < 2)
2018 return (ENOTDIR); /* overloaded */
2019 if (name[0] != VFS_GENERIC) {
2020 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2021 if (vfsp->vfc_typenum == name[0])
2022 break;
2023 if (vfsp == NULL)
2024 return (EOPNOTSUPP);
2025 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
2026 oldp, oldlenp, newp, newlen, p));
2027 }
2028 switch (name[1]) {
2029 case VFS_MAXTYPENUM:
2030 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
2031 case VFS_CONF:
2032 if (namelen < 3)
2033 return (ENOTDIR); /* overloaded */
2034 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2035 if (vfsp->vfc_typenum == name[2])
2036 break;
2037 if (vfsp == NULL)
2038 return (EOPNOTSUPP);
2039 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
2040 sizeof(struct vfsconf)));
2041 }
2042 return (EOPNOTSUPP);
2043 }
2044
2045 int kinfo_vdebug = 1;
2046 #define KINFO_VNODESLOP 10
2047 /*
2048 * Dump vnode list (via sysctl).
2049 * Copyout address of vnode followed by vnode.
2050 */
2051 /* ARGSUSED */
2052 int
2053 sysctl_vnode(where, sizep, p)
2054 char *where;
2055 size_t *sizep;
2056 struct proc *p;
2057 {
2058 struct mount *mp, *nmp;
2059 struct vnode *nvp, *vp;
2060 char *bp = where, *savebp;
2061 char *ewhere;
2062 int error;
2063
2064 #define VPTRSZ sizeof (struct vnode *)
2065 #define VNODESZ sizeof (struct vnode)
2066 if (where == NULL) {
2067 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2068 return (0);
2069 }
2070 ewhere = where + *sizep;
2071
2072 simple_lock(&mountlist_slock);
2073 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2074 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
2075 nmp = mp->mnt_list.cqe_next;
2076 continue;
2077 }
2078 savebp = bp;
2079 again:
2080 simple_lock(&mntvnode_slock);
2081 for (vp = mp->mnt_vnodelist.lh_first;
2082 vp != NULL;
2083 vp = nvp) {
2084 /*
2085 * Check that the vp is still associated with
2086 * this filesystem. RACE: could have been
2087 * recycled onto the same filesystem.
2088 */
2089 if (vp->v_mount != mp) {
2090 simple_unlock(&mntvnode_slock);
2091 if (kinfo_vdebug)
2092 printf("kinfo: vp changed\n");
2093 bp = savebp;
2094 goto again;
2095 }
2096 nvp = vp->v_mntvnodes.le_next;
2097 if (bp + VPTRSZ + VNODESZ > ewhere) {
2098 simple_unlock(&mntvnode_slock);
2099 *sizep = bp - where;
2100 return (ENOMEM);
2101 }
2102 simple_unlock(&mntvnode_slock);
2103 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
2104 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
2105 return (error);
2106 bp += VPTRSZ + VNODESZ;
2107 simple_lock(&mntvnode_slock);
2108 }
2109 simple_unlock(&mntvnode_slock);
2110 simple_lock(&mountlist_slock);
2111 nmp = mp->mnt_list.cqe_next;
2112 vfs_unbusy(mp, p);
2113 }
2114 simple_unlock(&mountlist_slock);
2115
2116 *sizep = bp - where;
2117 return (0);
2118 }
2119
2120 /*
2121 * Check to see if a filesystem is mounted on a block device.
2122 */
2123 int
2124 vfs_mountedon(vp)
2125 struct vnode *vp;
2126 {
2127 struct vnode *vq;
2128 int error = 0;
2129
2130 if (vp->v_specflags & SI_MOUNTEDON)
2131 return (EBUSY);
2132 if (vp->v_flag & VALIASED) {
2133 simple_lock(&spechash_slock);
2134 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
2135 if (vq->v_rdev != vp->v_rdev ||
2136 vq->v_type != vp->v_type)
2137 continue;
2138 if (vq->v_specflags & SI_MOUNTEDON) {
2139 error = EBUSY;
2140 break;
2141 }
2142 }
2143 simple_unlock(&spechash_slock);
2144 }
2145 return (error);
2146 }
2147
2148 /*
2149 * Unmount all filesystems. The list is traversed in reverse order
2150 * of mounting to avoid dependencies.
2151 */
2152 void
2153 vfs_unmountall()
2154 {
2155 struct mount *mp, *nmp;
2156 struct proc *p = current_proc(); /* XXX */
2157
2158 /*
2159 * Since this only runs when rebooting, it is not interlocked.
2160 */
2161 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2162 nmp = mp->mnt_list.cqe_prev;
2163 (void) dounmount(mp, MNT_FORCE, p);
2164 }
2165 }
2166
2167 /*
2168 * Build hash lists of net addresses and hang them off the mount point.
2169 * Called by ufs_mount() to set up the lists of export addresses.
2170 */
2171 static int
2172 vfs_hang_addrlist(mp, nep, argp)
2173 struct mount *mp;
2174 struct netexport *nep;
2175 struct export_args *argp;
2176 {
2177 register struct netcred *np;
2178 register struct radix_node_head *rnh;
2179 register int i;
2180 struct radix_node *rn;
2181 struct sockaddr *saddr, *smask = 0;
2182 struct domain *dom;
2183 int error;
2184
2185 if (argp->ex_addrlen == 0) {
2186 if (mp->mnt_flag & MNT_DEFEXPORTED)
2187 return (EPERM);
2188 np = &nep->ne_defexported;
2189 np->netc_exflags = argp->ex_flags;
2190 np->netc_anon = argp->ex_anon;
2191 np->netc_anon.cr_ref = 1;
2192 mp->mnt_flag |= MNT_DEFEXPORTED;
2193 return (0);
2194 }
2195 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2196 MALLOC(np, struct netcred *, i, M_NETADDR, M_WAITOK);
2197 bzero((caddr_t)np, i);
2198 saddr = (struct sockaddr *)(np + 1);
2199 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
2200 goto out;
2201 if (saddr->sa_len > argp->ex_addrlen)
2202 saddr->sa_len = argp->ex_addrlen;
2203 if (argp->ex_masklen) {
2204 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
2205 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
2206 if (error)
2207 goto out;
2208 if (smask->sa_len > argp->ex_masklen)
2209 smask->sa_len = argp->ex_masklen;
2210 }
2211 i = saddr->sa_family;
2212 if ((rnh = nep->ne_rtable[i]) == 0) {
2213 /*
2214 * Seems silly to initialize every AF when most are not
2215 * used, do so on demand here
2216 */
2217 for (dom = domains; dom; dom = dom->dom_next)
2218 if (dom->dom_family == i && dom->dom_rtattach) {
2219 dom->dom_rtattach((void **)&nep->ne_rtable[i],
2220 dom->dom_rtoffset);
2221 break;
2222 }
2223 if ((rnh = nep->ne_rtable[i]) == 0) {
2224 error = ENOBUFS;
2225 goto out;
2226 }
2227 }
2228 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
2229 np->netc_rnodes);
2230 if (rn == 0) {
2231 /*
2232 * One of the reasons that rnh_addaddr may fail is that
2233 * the entry already exists. To check for this case, we
2234 * look up the entry to see if it is there. If so, we
2235 * do not need to make a new entry but do return success.
2236 */
2237 _FREE(np, M_NETADDR);
2238 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
2239 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
2240 ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
2241 !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
2242 (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
2243 return (0);
2244 return (EPERM);
2245 }
2246 np->netc_exflags = argp->ex_flags;
2247 np->netc_anon = argp->ex_anon;
2248 np->netc_anon.cr_ref = 1;
2249 return (0);
2250 out:
2251 _FREE(np, M_NETADDR);
2252 return (error);
2253 }
2254
2255 /* ARGSUSED */
2256 static int
2257 vfs_free_netcred(rn, w)
2258 struct radix_node *rn;
2259 caddr_t w;
2260 {
2261 register struct radix_node_head *rnh = (struct radix_node_head *)w;
2262
2263 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
2264 _FREE((caddr_t)rn, M_NETADDR);
2265 return (0);
2266 }
2267
2268 /*
2269 * Free the net address hash lists that are hanging off the mount points.
2270 */
2271 static void
2272 vfs_free_addrlist(nep)
2273 struct netexport *nep;
2274 {
2275 register int i;
2276 register struct radix_node_head *rnh;
2277
2278 for (i = 0; i <= AF_MAX; i++)
2279 if (rnh = nep->ne_rtable[i]) {
2280 (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
2281 (caddr_t)rnh);
2282 _FREE((caddr_t)rnh, M_RTABLE);
2283 nep->ne_rtable[i] = 0;
2284 }
2285 }
2286
2287 int
2288 vfs_export(mp, nep, argp)
2289 struct mount *mp;
2290 struct netexport *nep;
2291 struct export_args *argp;
2292 {
2293 int error;
2294
2295 if (argp->ex_flags & MNT_DELEXPORT) {
2296 vfs_free_addrlist(nep);
2297 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2298 }
2299 if (argp->ex_flags & MNT_EXPORTED) {
2300 if (error = vfs_hang_addrlist(mp, nep, argp))
2301 return (error);
2302 mp->mnt_flag |= MNT_EXPORTED;
2303 }
2304 return (0);
2305 }
2306
2307 struct netcred *
2308 vfs_export_lookup(mp, nep, nam)
2309 register struct mount *mp;
2310 struct netexport *nep;
2311 struct mbuf *nam;
2312 {
2313 register struct netcred *np;
2314 register struct radix_node_head *rnh;
2315 struct sockaddr *saddr;
2316
2317 np = NULL;
2318 if (mp->mnt_flag & MNT_EXPORTED) {
2319 /*
2320 * Lookup in the export list first.
2321 */
2322 if (nam != NULL) {
2323 saddr = mtod(nam, struct sockaddr *);
2324 rnh = nep->ne_rtable[saddr->sa_family];
2325 if (rnh != NULL) {
2326 np = (struct netcred *)
2327 (*rnh->rnh_matchaddr)((caddr_t)saddr,
2328 rnh);
2329 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2330 np = NULL;
2331 }
2332 }
2333 /*
2334 * If no address match, use the default if it exists.
2335 */
2336 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2337 np = &nep->ne_defexported;
2338 }
2339 return (np);
2340 }
2341
2342 /*
2343 * try to reclaim vnodes from the memory
2344 * object cache
2345 */
2346 int
2347 vm_object_cache_reclaim(int count)
2348 {
2349 int cnt;
2350 void vnode_pager_release_from_cache(int *);
2351
2352 /* attempt to reclaim vnodes from VM object cache */
2353 cnt = count;
2354 vnode_pager_release_from_cache(&cnt);
2355 return(cnt);
2356 }
2357
2358 /*
2359 * Release memory object reference held by inactive vnodes
2360 * and then try to reclaim some vnodes from the memory
2361 * object cache
2362 */
2363 int
2364 vnreclaim(int count)
2365 {
2366 int cnt, i, loopcnt;
2367 void *obj;
2368 struct vnode *vp;
2369 int err;
2370 struct proc *p;
2371
2372 i = 0;
2373 loopcnt = 0;
2374
2375 /* Try to release "count" vnodes from the inactive list */
2376 restart:
2377 if (++loopcnt > inactivevnodes) {
2378 /*
2379 * I did my best trying to reclaim the vnodes.
2380 * Do not try any more as that would only lead to
2381 * long latencies. Also in the worst case
2382 * this can get totally CPU bound.
2383 * Just fall though and attempt a reclaim of VM
2384 * object cache
2385 */
2386 goto out;
2387 }
2388
2389 simple_lock(&vnode_free_list_slock);
2390 for (vp = TAILQ_FIRST(&vnode_inactive_list);
2391 (vp != NULLVP) && (i < count);
2392 vp = TAILQ_NEXT(vp, v_freelist)) {
2393
2394 if (simple_lock_try(&vp->v_interlock)) {
2395 if (vp->v_usecount != 1)
2396 panic("vnreclaim: v_usecount");
2397
2398 if(!UBCINFOEXISTS(vp)) {
2399 if (vp->v_type == VBAD) {
2400 VREMINACTIVE("vnreclaim", vp);
2401 simple_unlock(&vp->v_interlock);
2402 continue;
2403 } else
2404 panic("non UBC vnode on inactive list");
2405 /* Should not reach here */
2406 }
2407
2408 /* If vnode is already being reclaimed, wait */
2409 if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
2410 vp->v_flag |= VXWANT;
2411 simple_unlock(&vp->v_interlock);
2412 simple_unlock(&vnode_free_list_slock);
2413 (void)tsleep((caddr_t)vp, PINOD, "vocr", 0);
2414 goto restart;
2415 }
2416
2417 VREMINACTIVE("vnreclaim", vp);
2418 simple_unlock(&vnode_free_list_slock);
2419
2420 /* held vnodes must not be reclaimed */
2421 if (vp->v_ubcinfo->ui_holdcnt) { /* XXX */
2422 vinactive(vp);
2423 simple_unlock(&vp->v_interlock);
2424 goto restart;
2425 }
2426
2427 if (ubc_issetflags(vp, UI_WASMAPPED)) {
2428 /*
2429 * We should not reclaim as it is likely
2430 * to be in use. Let it die a natural death.
2431 * Release the UBC reference if one exists
2432 * and put it back at the tail.
2433 */
2434 if (ubc_issetflags(vp, UI_HASOBJREF)) {
2435 obj = ubc_getobject(vp, UBC_NOREACTIVATE);
2436 if (obj == NULL)
2437 panic("vnreclaim: null object");
2438 /* release the reference gained by ubc_info_init() */
2439 ubc_clearflags(vp, UI_HASOBJREF);
2440 simple_unlock(&vp->v_interlock);
2441 vm_object_deallocate(obj);
2442 /*
2443 * The vnode interlock was release.
2444 * vm_object_deallocate() might have blocked.
2445 * It is possible that the object was terminated.
2446 * It is also possible that the vnode was
2447 * reactivated. Evaluate the state again.
2448 */
2449 if (UBCINFOEXISTS(vp)) {
2450 simple_lock(&vp->v_interlock);
2451 if ((vp->v_usecount == 1) && !VONLIST(vp))
2452 vinactive(vp);
2453 simple_unlock(&vp->v_interlock);
2454 }
2455 } else {
2456 vinactive(vp);
2457 simple_unlock(&vp->v_interlock);
2458 }
2459 } else {
2460 VORECLAIM_ENABLE(vp);
2461
2462 /*
2463 * scrub the dirty pages and invalidate the buffers
2464 */
2465 p = current_proc();
2466 err = vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p);
2467 if (err) {
2468 /* cannot reclaim */
2469 simple_lock(&vp->v_interlock);
2470 vinactive(vp);
2471 VORECLAIM_DISABLE(vp);
2472 simple_unlock(&vp->v_interlock);
2473 goto restart;
2474 }
2475 simple_lock(&vp->v_interlock);
2476 if(vp->v_usecount != 1)
2477 panic("VOCR: usecount race");
2478 simple_unlock(&vp->v_interlock);
2479
2480 /*
2481 * If the UBC reference on the memory object
2482 * was already lost, regain it. This will
2483 * keep the memory object alive for rest of the
2484 * reclaim and finally this reference would
2485 * be lost by memory_object_destroy()
2486 */
2487 obj = ubc_getobject(vp, (UBC_NOREACTIVATE|UBC_HOLDOBJECT));
2488 if (obj == (void *)NULL)
2489 panic("vnreclaim: null object");
2490
2491 /* clean up the state in VM without invalidating */
2492 (void)ubc_clean(vp, 0);
2493
2494 /* flush and invalidate buffers associated with the vnode */
2495 if (vp->v_tag == VT_NFS)
2496 nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
2497 else
2498 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
2499
2500 /*
2501 * It is not necessary to call ubc_uncache()
2502 * here because memory_object_destroy() marks
2503 * the memory object non cachable already
2504 *
2505 * Need to release the vnode lock before calling
2506 * vm_object_deallocate() to avoid deadlock
2507 * when the vnode goes through vop_inactive
2508 *
2509 * Note: for the v_usecount == 1 case, VOP_INACTIVE
2510 * has not yet been called. Call it now while vp is
2511 * still locked, it will also release the lock.
2512 */
2513 if (vp->v_usecount == 1)
2514 VOP_INACTIVE(vp, p);
2515 else
2516 VOP_UNLOCK(vp, 0, p);
2517
2518 /*
2519 * This vnode is ready to be reclaimed.
2520 * Terminate the memory object.
2521 * memory_object_destroy() will result in
2522 * vnode_pager_no_senders().
2523 * That will release the pager reference
2524 * and the vnode will move to the free list.
2525 */
2526 if (ISSET(vp->v_flag, VTERMINATE))
2527 panic("vnreclaim: already teminating");
2528 SET(vp->v_flag, VTERMINATE);
2529
2530 memory_object_destroy(obj, 0);
2531
2532 /*
2533 * memory_object_destroy() is asynchronous with respect
2534 * to vnode_pager_no_senders().
2535 * wait for vnode_pager_no_senders() to clear
2536 * VTERMINATE
2537 */
2538 while (ISSET(vp->v_flag, VTERMINATE)) {
2539 SET(vp->v_flag, VTERMWANT);
2540 tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vnreclaim", 0);
2541 }
2542 simple_lock(&vp->v_interlock);
2543 VORECLAIM_DISABLE(vp);
2544 i++;
2545 simple_unlock(&vp->v_interlock);
2546 }
2547 /* inactive list lock was released, must restart */
2548 goto restart;
2549 }
2550 }
2551 simple_unlock(&vnode_free_list_slock);
2552
2553 vnode_reclaim_tried += i;
2554 out:
2555 i = vm_object_cache_reclaim(count);
2556 vnode_objects_reclaimed += i;
2557
2558 return(i);
2559 }
2560
2561 /*
2562 * This routine is called from vnode_pager_no_senders()
2563 * which in turn can be called with vnode locked by vnode_uncache()
2564 * But it could also get called as a result of vm_object_cache_trim().
2565 * In that case lock state is unknown.
2566 * AGE the vnode so that it gets recycled quickly.
2567 * Check lock status to decide whether to call vput() or vrele().
2568 */
2569 void
2570 vnode_pager_vrele(struct vnode *vp)
2571 {
2572
2573 boolean_t funnel_state;
2574 int isvnreclaim = 1;
2575
2576 if (vp == (struct vnode *) NULL)
2577 panic("vnode_pager_vrele: null vp");
2578
2579 funnel_state = thread_funnel_set(kernel_flock, TRUE);
2580
2581 /* Mark the vnode to be recycled */
2582 vagevp(vp);
2583
2584 simple_lock(&vp->v_interlock);
2585 /*
2586 * If a vgone (or vclean) is already in progress,
2587 * Do not bother with the ubc_info cleanup.
2588 * Let the vclean deal with it.
2589 */
2590 if (vp->v_flag & VXLOCK) {
2591 CLR(vp->v_flag, VTERMINATE);
2592 if (ISSET(vp->v_flag, VTERMWANT)) {
2593 CLR(vp->v_flag, VTERMWANT);
2594 wakeup((caddr_t)&vp->v_ubcinfo);
2595 }
2596 simple_unlock(&vp->v_interlock);
2597 vrele(vp);
2598 (void) thread_funnel_set(kernel_flock, funnel_state);
2599 return;
2600 }
2601
2602 /* It's dead, Jim! */
2603 if (!ISSET(vp->v_flag, VORECLAIM)) {
2604 /*
2605 * called as a result of eviction of the memory
2606 * object from the memory object cache
2607 */
2608 isvnreclaim = 0;
2609
2610 /* So serialize vnode operations */
2611 VORECLAIM_ENABLE(vp);
2612 }
2613 if (!ISSET(vp->v_flag, VTERMINATE))
2614 SET(vp->v_flag, VTERMINATE);
2615 if (UBCINFOEXISTS(vp)) {
2616 if (ubc_issetflags(vp, UI_WASMAPPED))
2617 SET(vp->v_flag, VWASMAPPED);
2618
2619 if ((vp->v_ubcinfo->ui_holdcnt) /* XXX */
2620 && !(vp->v_flag & VXLOCK))
2621 panic("vnode_pager_vrele: freeing held ubc_info");
2622
2623 simple_unlock(&vp->v_interlock);
2624 ubc_info_free(vp);
2625 vp->v_ubcinfo = UBC_NOINFO; /* catch bad accesses */
2626 } else {
2627 if ((vp->v_type == VBAD) && ((vp)->v_ubcinfo != UBC_INFO_NULL)
2628 && ((vp)->v_ubcinfo != UBC_NOINFO)) {
2629 simple_unlock(&vp->v_interlock);
2630 ubc_info_free(vp);
2631 vp->v_ubcinfo = UBC_NOINFO; /* catch bad accesses */
2632 } else {
2633 simple_unlock(&vp->v_interlock);
2634 }
2635 }
2636
2637 CLR(vp->v_flag, VTERMINATE);
2638
2639 if (vp->v_type != VBAD){
2640 vgone(vp); /* revoke the vnode */
2641 vrele(vp); /* and drop the reference */
2642 } else
2643 vrele(vp);
2644
2645 if (ISSET(vp->v_flag, VTERMWANT)) {
2646 CLR(vp->v_flag, VTERMWANT);
2647 wakeup((caddr_t)&vp->v_ubcinfo);
2648 }
2649 if (!isvnreclaim)
2650 VORECLAIM_DISABLE(vp);
2651 (void) thread_funnel_set(kernel_flock, funnel_state);
2652 return;
2653 }
2654
2655
2656 #if DIAGNOSTIC
2657 int walk_vnodes_debug=0;
2658
2659 void
2660 walk_allvnodes()
2661 {
2662 struct proc *p = current_proc(); /* XXX */
2663 struct mount *mp, *nmp;
2664 struct vnode *vp;
2665 int cnt = 0;
2666
2667 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2668 for (vp = mp->mnt_vnodelist.lh_first;
2669 vp != NULL;
2670 vp = vp->v_mntvnodes.le_next) {
2671 if (vp->v_usecount < 0){
2672 if(walk_vnodes_debug) {
2673 printf("vp is %x\n",vp);
2674 }
2675 }
2676 }
2677 nmp = mp->mnt_list.cqe_next;
2678 }
2679 for (cnt = 0, vp = vnode_free_list.tqh_first;
2680 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2681 if ((vp->v_usecount < 0) && walk_vnodes_debug) {
2682 if(walk_vnodes_debug) {
2683 printf("vp is %x\n",vp);
2684 }
2685 }
2686 }
2687 printf("%d - free\n", cnt);
2688
2689 for (cnt = 0, vp = vnode_inactive_list.tqh_first;
2690 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2691 if ((vp->v_usecount < 0) && walk_vnodes_debug) {
2692 if(walk_vnodes_debug) {
2693 printf("vp is %x\n",vp);
2694 }
2695 }
2696 }
2697 printf("%d - inactive\n", cnt);
2698 }
2699 #endif /* DIAGNOSTIC */