]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_subr.c
742af69fd44626bc2368ebc33679018898f2ff49
[apple/xnu.git] / bsd / vfs / vfs_subr.c
1 /*
2 * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
26 /*
27 * Copyright (c) 1989, 1993
28 * The Regents of the University of California. All rights reserved.
29 * (c) UNIX System Laboratories, Inc.
30 * All or some portions of this file are derived from material licensed
31 * to the University of California by American Telephone and Telegraph
32 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
33 * the permission of UNIX System Laboratories, Inc.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
64 */
65
66 /*
67 * External virtual filesystem routines
68 */
69
70 #undef DIAGNOSTIC
71 #define DIAGNOSTIC 1
72
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/proc.h>
76 #include <sys/mount.h>
77 #include <sys/time.h>
78 #include <sys/vnode.h>
79 #include <sys/stat.h>
80 #include <sys/namei.h>
81 #include <sys/ucred.h>
82 #include <sys/buf.h>
83 #include <sys/errno.h>
84 #include <sys/malloc.h>
85 #include <sys/domain.h>
86 #include <sys/mbuf.h>
87 #include <sys/syslog.h>
88 #include <sys/ubc.h>
89 #include <sys/vm.h>
90 #include <sys/sysctl.h>
91
92 #include <kern/assert.h>
93
94 #include <miscfs/specfs/specdev.h>
95
96 #include <mach/mach_types.h>
97 #include <mach/memory_object_types.h>
98
99
100 enum vtype iftovt_tab[16] = {
101 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
102 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
103 };
104 int vttoif_tab[9] = {
105 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
106 S_IFSOCK, S_IFIFO, S_IFMT,
107 };
108
109 static void vfree(struct vnode *vp);
110 static void vinactive(struct vnode *vp);
111 static int vnreclaim(int count);
112 extern kern_return_t
113 adjust_vm_object_cache(vm_size_t oval, vm_size_t nval);
114
115 TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
116 TAILQ_HEAD(inactivelst, vnode) vnode_inactive_list; /* vnode inactive list */
117 struct mntlist mountlist; /* mounted filesystem list */
118
119 #if DIAGNOSTIC
120 #define VLISTCHECK(fun, vp, list) \
121 if ((vp)->v_freelist.tqe_prev == (struct vnode **)0xdeadb) \
122 panic("%s: %s vnode not on %slist", (fun), (list), (list));
123
124 #define VINACTIVECHECK(fun, vp, expected) \
125 do { \
126 int __is_inactive = ISSET((vp)->v_flag, VUINACTIVE); \
127 if (__is_inactive ^ expected) \
128 panic("%s: %sinactive vnode, expected %s", (fun), \
129 __is_inactive? "" : "not ", \
130 expected? "inactive": "not inactive"); \
131 } while(0)
132 #else
133 #define VLISTCHECK(fun, vp, list)
134 #define VINACTIVECHECK(fun, vp, expected)
135 #endif /* DIAGNOSTIC */
136
137 #define VLISTNONE(vp) \
138 do { \
139 (vp)->v_freelist.tqe_next = (struct vnode *)0; \
140 (vp)->v_freelist.tqe_prev = (struct vnode **)0xdeadb; \
141 } while(0)
142
143 #define VONLIST(vp) \
144 ((vp)->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
145
146 /* remove a vnode from free vnode list */
147 #define VREMFREE(fun, vp) \
148 do { \
149 VLISTCHECK((fun), (vp), "free"); \
150 TAILQ_REMOVE(&vnode_free_list, (vp), v_freelist); \
151 VLISTNONE((vp)); \
152 freevnodes--; \
153 } while(0)
154
155 /* remove a vnode from inactive vnode list */
156 #define VREMINACTIVE(fun, vp) \
157 do { \
158 VLISTCHECK((fun), (vp), "inactive"); \
159 VINACTIVECHECK((fun), (vp), VUINACTIVE); \
160 TAILQ_REMOVE(&vnode_inactive_list, (vp), v_freelist); \
161 CLR((vp)->v_flag, VUINACTIVE); \
162 VLISTNONE((vp)); \
163 inactivevnodes--; \
164 } while(0)
165
166 #define VORECLAIM_ENABLE(vp) \
167 do { \
168 if (ISSET((vp)->v_flag, VORECLAIM)) \
169 panic("vm object raclaim already"); \
170 SET((vp)->v_flag, VORECLAIM); \
171 } while(0)
172
173 #define VORECLAIM_DISABLE(vp) \
174 do { \
175 CLR((vp)->v_flag, VORECLAIM); \
176 if (ISSET((vp)->v_flag, VXWANT)) { \
177 CLR((vp)->v_flag, VXWANT); \
178 wakeup((caddr_t)(vp)); \
179 } \
180 } while(0)
181
182 /*
183 * Have to declare first two locks as actual data even if !MACH_SLOCKS, since
184 * a pointers to them get passed around.
185 */
186 simple_lock_data_t mountlist_slock;
187 simple_lock_data_t mntvnode_slock;
188 decl_simple_lock_data(,mntid_slock);
189 decl_simple_lock_data(,vnode_free_list_slock);
190 decl_simple_lock_data(,spechash_slock);
191
192 /*
193 * vnodetarget is the amount of vnodes we expect to get back
194 * from the the inactive vnode list and VM object cache.
195 * As vnreclaim() is a mainly cpu bound operation for faster
196 * processers this number could be higher.
197 * Having this number too high introduces longer delays in
198 * the execution of getnewvnode().
199 */
200 unsigned long vnodetarget; /* target for vnreclaim() */
201 #define VNODE_FREE_TARGET 20 /* Default value for vnodetarget */
202
203 /*
204 * We need quite a few vnodes on the free list to sustain the
205 * rapid stat() the compilation process does, and still benefit from the name
206 * cache. Having too few vnodes on the free list causes serious disk
207 * thrashing as we cycle through them.
208 */
209 #define VNODE_FREE_MIN 300 /* freelist should have at least these many */
210
211 /*
212 * We need to get vnodes back from the VM object cache when a certain #
213 * of vnodes are reused from the freelist. This is essential for the
214 * caching to be effective in the namecache and the buffer cache [for the
215 * metadata].
216 */
217 #define VNODE_TOOMANY_REUSED (VNODE_FREE_MIN/4)
218
219 /*
220 * If we have enough vnodes on the freelist we do not want to reclaim
221 * the vnodes from the VM object cache.
222 */
223 #define VNODE_FREE_ENOUGH (VNODE_FREE_MIN + (VNODE_FREE_MIN/2))
224
225 /*
226 * Initialize the vnode management data structures.
227 */
228 __private_extern__ void
229 vntblinit()
230 {
231 extern struct lock__bsd__ exchangelock;
232
233 simple_lock_init(&mountlist_slock);
234 simple_lock_init(&mntvnode_slock);
235 simple_lock_init(&mntid_slock);
236 simple_lock_init(&spechash_slock);
237 TAILQ_INIT(&vnode_free_list);
238 simple_lock_init(&vnode_free_list_slock);
239 TAILQ_INIT(&vnode_inactive_list);
240 CIRCLEQ_INIT(&mountlist);
241 lockinit(&exchangelock, PVFS, "exchange", 0, 0);
242
243 if (!vnodetarget)
244 vnodetarget = VNODE_FREE_TARGET;
245
246 /*
247 * Scale the vm_object_cache to accomodate the vnodes
248 * we want to cache
249 */
250 (void) adjust_vm_object_cache(0, desiredvnodes - VNODE_FREE_MIN);
251 }
252
253 /* Reset the VM Object Cache with the values passed in */
254 __private_extern__ kern_return_t
255 reset_vmobjectcache(unsigned int val1, unsigned int val2)
256 {
257 vm_size_t oval = val1 - VNODE_FREE_MIN;
258 vm_size_t nval;
259
260 if(val2 < VNODE_FREE_MIN)
261 nval = 0;
262 else
263 nval = val2 - VNODE_FREE_MIN;
264
265 return(adjust_vm_object_cache(oval, nval));
266 }
267
268 /*
269 * Mark a mount point as busy. Used to synchronize access and to delay
270 * unmounting. Interlock is not released on failure.
271 */
272 int
273 vfs_busy(mp, flags, interlkp, p)
274 struct mount *mp;
275 int flags;
276 struct slock *interlkp;
277 struct proc *p;
278 {
279 int lkflags;
280
281 if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
282 if (flags & LK_NOWAIT)
283 return (ENOENT);
284 mp->mnt_kern_flag |= MNTK_MWAIT;
285 if (interlkp)
286 simple_unlock(interlkp);
287 /*
288 * Since all busy locks are shared except the exclusive
289 * lock granted when unmounting, the only place that a
290 * wakeup needs to be done is at the release of the
291 * exclusive lock at the end of dounmount.
292 */
293 sleep((caddr_t)mp, PVFS);
294 if (interlkp)
295 simple_lock(interlkp);
296 return (ENOENT);
297 }
298 lkflags = LK_SHARED;
299 if (interlkp)
300 lkflags |= LK_INTERLOCK;
301 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
302 panic("vfs_busy: unexpected lock failure");
303 return (0);
304 }
305
306 /*
307 * Free a busy filesystem.
308 */
309 void
310 vfs_unbusy(mp, p)
311 struct mount *mp;
312 struct proc *p;
313 {
314
315 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
316 }
317
318 /*
319 * Lookup a filesystem type, and if found allocate and initialize
320 * a mount structure for it.
321 *
322 * Devname is usually updated by mount(8) after booting.
323 */
324 int
325 vfs_rootmountalloc(fstypename, devname, mpp)
326 char *fstypename;
327 char *devname;
328 struct mount **mpp;
329 {
330 struct proc *p = current_proc(); /* XXX */
331 struct vfsconf *vfsp;
332 struct mount *mp;
333
334 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
335 if (!strcmp(vfsp->vfc_name, fstypename))
336 break;
337 if (vfsp == NULL)
338 return (ENODEV);
339 mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
340 bzero((char *)mp, (u_long)sizeof(struct mount));
341
342 /* Initialize the default IO constraints */
343 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
344 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
345
346 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
347 (void)vfs_busy(mp, LK_NOWAIT, 0, p);
348 LIST_INIT(&mp->mnt_vnodelist);
349 mp->mnt_vfc = vfsp;
350 mp->mnt_op = vfsp->vfc_vfsops;
351 mp->mnt_flag = MNT_RDONLY;
352 mp->mnt_vnodecovered = NULLVP;
353 vfsp->vfc_refcount++;
354 mp->mnt_stat.f_type = vfsp->vfc_typenum;
355 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
356 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
357 mp->mnt_stat.f_mntonname[0] = '/';
358 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
359 *mpp = mp;
360 return (0);
361 }
362
363 /*
364 * Find an appropriate filesystem to use for the root. If a filesystem
365 * has not been preselected, walk through the list of known filesystems
366 * trying those that have mountroot routines, and try them until one
367 * works or we have tried them all.
368 */
369 int
370 vfs_mountroot()
371 {
372 struct vfsconf *vfsp;
373 extern int (*mountroot)(void);
374 int error;
375
376 if (mountroot != NULL) {
377 error = (*mountroot)();
378 return (error);
379 }
380
381 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
382 if (vfsp->vfc_mountroot == NULL)
383 continue;
384 if ((error = (*vfsp->vfc_mountroot)()) == 0)
385 return (0);
386 if (error != EINVAL)
387 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
388 }
389 return (ENODEV);
390 }
391
392 /*
393 * Lookup a mount point by filesystem identifier.
394 */
395 struct mount *
396 vfs_getvfs(fsid)
397 fsid_t *fsid;
398 {
399 register struct mount *mp;
400
401 simple_lock(&mountlist_slock);
402 for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
403 mp = mp->mnt_list.cqe_next) {
404 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
405 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
406 simple_unlock(&mountlist_slock);
407 return (mp);
408 }
409 }
410 simple_unlock(&mountlist_slock);
411 return ((struct mount *)0);
412 }
413
414 /*
415 * Get a new unique fsid
416 */
417 void
418 vfs_getnewfsid(mp)
419 struct mount *mp;
420 {
421 static u_short xxxfs_mntid;
422
423 fsid_t tfsid;
424 int mtype;
425
426 simple_lock(&mntid_slock);
427 mtype = mp->mnt_vfc->vfc_typenum;
428 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
429 mp->mnt_stat.f_fsid.val[1] = mtype;
430 if (xxxfs_mntid == 0)
431 ++xxxfs_mntid;
432 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
433 tfsid.val[1] = mtype;
434 if (mountlist.cqh_first != (void *)&mountlist) {
435 while (vfs_getvfs(&tfsid)) {
436 tfsid.val[0]++;
437 xxxfs_mntid++;
438 }
439 }
440 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
441 simple_unlock(&mntid_slock);
442 }
443
444 /*
445 * Set vnode attributes to VNOVAL
446 */
447 void
448 vattr_null(vap)
449 register struct vattr *vap;
450 {
451
452 vap->va_type = VNON;
453 vap->va_size = vap->va_bytes = VNOVAL;
454 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
455 vap->va_fsid = vap->va_fileid =
456 vap->va_blocksize = vap->va_rdev =
457 vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
458 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
459 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
460 vap->va_flags = vap->va_gen = VNOVAL;
461 vap->va_vaflags = 0;
462 }
463
464 /*
465 * Routines having to do with the management of the vnode table.
466 */
467 extern int (**dead_vnodeop_p)(void *);
468 static void vclean __P((struct vnode *vp, int flag, struct proc *p));
469 extern void vgonel __P((struct vnode *vp, struct proc *p));
470 long numvnodes, freevnodes;
471 long inactivevnodes;
472 long vnode_reclaim_tried;
473 long vnode_objects_reclaimed;
474
475
476 extern struct vattr va_null;
477
478 /*
479 * Return the next vnode from the free list.
480 */
481 int
482 getnewvnode(tag, mp, vops, vpp)
483 enum vtagtype tag;
484 struct mount *mp;
485 int (**vops)(void *);
486 struct vnode **vpp;
487 {
488 struct proc *p = current_proc(); /* XXX */
489 struct vnode *vp;
490 int cnt, didretry = 0;
491 static int reused = 0; /* track the reuse rate */
492 int reclaimhits = 0;
493
494 retry:
495 simple_lock(&vnode_free_list_slock);
496 /*
497 * MALLOC a vnode if the number of vnodes has not reached the desired
498 * value and the number on the free list is still reasonable...
499 * reuse from the freelist even though we may evict a name cache entry
500 * to reduce the number of vnodes that accumulate.... vnodes tie up
501 * wired memory and are never garbage collected
502 */
503 if (numvnodes < desiredvnodes && (freevnodes < (2 * VNODE_FREE_MIN))) {
504 numvnodes++;
505 simple_unlock(&vnode_free_list_slock);
506 MALLOC_ZONE(vp, struct vnode *, sizeof *vp, M_VNODE, M_WAITOK);
507 bzero((char *)vp, sizeof *vp);
508 VLISTNONE(vp); /* avoid double queue removal */
509 simple_lock_init(&vp->v_interlock);
510 goto done;
511 }
512
513 /*
514 * Once the desired number of vnodes are allocated,
515 * we start reusing the vnodes.
516 */
517 if (freevnodes < VNODE_FREE_MIN) {
518 /*
519 * if we are low on vnodes on the freelist attempt to get
520 * some back from the inactive list and VM object cache
521 */
522 simple_unlock(&vnode_free_list_slock);
523 (void)vnreclaim(vnodetarget);
524 simple_lock(&vnode_free_list_slock);
525 }
526 if (numvnodes >= desiredvnodes && reused > VNODE_TOOMANY_REUSED) {
527 reused = 0;
528 if (freevnodes < VNODE_FREE_ENOUGH) {
529 simple_unlock(&vnode_free_list_slock);
530 (void)vnreclaim(vnodetarget);
531 simple_lock(&vnode_free_list_slock);
532 }
533 }
534
535 for (cnt = 0, vp = vnode_free_list.tqh_first;
536 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
537 if (simple_lock_try(&vp->v_interlock)) {
538 /* got the interlock */
539 if (ISSET(vp->v_flag, VORECLAIM)) {
540 /* skip over the vnodes that are being reclaimed */
541 simple_unlock(&vp->v_interlock);
542 reclaimhits++;
543 } else
544 break;
545 }
546 }
547
548 /*
549 * Unless this is a bad time of the month, at most
550 * the first NCPUS items on the free list are
551 * locked, so this is close enough to being empty.
552 */
553 if (vp == NULLVP) {
554 simple_unlock(&vnode_free_list_slock);
555 if (!(didretry++) && (vnreclaim(vnodetarget) > 0))
556 goto retry;
557 tablefull("vnode");
558 log(LOG_EMERG, "%d vnodes locked, %d desired, %d numvnodes, "
559 "%d free, %d inactive, %d being reclaimed\n",
560 cnt, desiredvnodes, numvnodes, freevnodes, inactivevnodes,
561 reclaimhits);
562 *vpp = 0;
563 return (ENFILE);
564 }
565
566 if (vp->v_usecount)
567 panic("free vnode isn't: v_type = %d, v_usecount = %d?",
568 vp->v_type, vp->v_usecount);
569
570 VREMFREE("getnewvnode", vp);
571 reused++;
572 simple_unlock(&vnode_free_list_slock);
573 vp->v_lease = NULL;
574 cache_purge(vp);
575 if (vp->v_type != VBAD)
576 vgonel(vp, p); /* clean and reclaim the vnode */
577 else
578 simple_unlock(&vp->v_interlock);
579 #if DIAGNOSTIC
580 if (vp->v_data)
581 panic("cleaned vnode isn't");
582 {
583 int s = splbio();
584 if (vp->v_numoutput)
585 panic("Clean vnode has pending I/O's");
586 splx(s);
587 }
588 #endif
589 if (UBCINFOEXISTS(vp))
590 panic("getnewvnode: ubcinfo not cleaned");
591 else
592 vp->v_ubcinfo = 0;
593
594 vp->v_lastr = -1;
595 vp->v_ralen = 0;
596 vp->v_maxra = 0;
597 vp->v_lastw = 0;
598 vp->v_ciosiz = 0;
599 vp->v_cstart = 0;
600 vp->v_clen = 0;
601 vp->v_socket = 0;
602
603 done:
604 vp->v_flag = VSTANDARD;
605 vp->v_type = VNON;
606 vp->v_tag = tag;
607 vp->v_op = vops;
608 insmntque(vp, mp);
609 *vpp = vp;
610 vp->v_usecount = 1;
611 vp->v_data = 0;
612 return (0);
613 }
614
615 /*
616 * Move a vnode from one mount queue to another.
617 */
618 void
619 insmntque(vp, mp)
620 struct vnode *vp;
621 struct mount *mp;
622 {
623
624 simple_lock(&mntvnode_slock);
625 /*
626 * Delete from old mount point vnode list, if on one.
627 */
628 if (vp->v_mount != NULL)
629 LIST_REMOVE(vp, v_mntvnodes);
630 /*
631 * Insert into list of vnodes for the new mount point, if available.
632 */
633 if ((vp->v_mount = mp) != NULL)
634 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
635 simple_unlock(&mntvnode_slock);
636 }
637
638 __inline void
639 vpwakeup(struct vnode *vp)
640 {
641 if (vp) {
642 if (--vp->v_numoutput < 0)
643 panic("vpwakeup: neg numoutput");
644 if ((vp->v_flag & VBWAIT || vp->v_flag & VTHROTTLED)
645 && vp->v_numoutput <= 0) {
646 vp->v_flag &= ~(VBWAIT|VTHROTTLED);
647 wakeup((caddr_t)&vp->v_numoutput);
648 }
649 }
650 }
651
652 /*
653 * Update outstanding I/O count and do wakeup if requested.
654 */
655 void
656 vwakeup(bp)
657 register struct buf *bp;
658 {
659 CLR(bp->b_flags, B_WRITEINPROG);
660 vpwakeup(bp->b_vp);
661 }
662
663 /*
664 * Flush out and invalidate all buffers associated with a vnode.
665 * Called with the underlying object locked.
666 */
667 int
668 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
669 register struct vnode *vp;
670 int flags;
671 struct ucred *cred;
672 struct proc *p;
673 int slpflag, slptimeo;
674 {
675 register struct buf *bp;
676 struct buf *nbp, *blist;
677 int s, error = 0;
678
679 if (flags & V_SAVE) {
680 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) {
681 return (error);
682 }
683
684 // XXXdbg - if there are dirty bufs, wait for 'em if they're busy
685 for (bp=vp->v_dirtyblkhd.lh_first; bp; bp=nbp) {
686 nbp = bp->b_vnbufs.le_next;
687 if (ISSET(bp->b_flags, B_BUSY)) {
688 SET(bp->b_flags, B_WANTED);
689 tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), "vinvalbuf", 0);
690 nbp = vp->v_dirtyblkhd.lh_first;
691 } else {
692 panic("vinvalbuf: dirty buf (vp 0x%x, bp 0x%x)", vp, bp);
693 }
694 }
695 }
696
697 for (;;) {
698 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
699 while (blist && blist->b_lblkno < 0)
700 blist = blist->b_vnbufs.le_next;
701 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
702 (flags & V_SAVEMETA))
703 while (blist && blist->b_lblkno < 0)
704 blist = blist->b_vnbufs.le_next;
705 if (!blist)
706 break;
707
708 for (bp = blist; bp; bp = nbp) {
709 nbp = bp->b_vnbufs.le_next;
710 if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
711 continue;
712 s = splbio();
713 if (ISSET(bp->b_flags, B_BUSY)) {
714 SET(bp->b_flags, B_WANTED);
715 error = tsleep((caddr_t)bp,
716 slpflag | (PRIBIO + 1), "vinvalbuf",
717 slptimeo);
718 splx(s);
719 if (error) {
720 return (error);
721 }
722 break;
723 }
724 bremfree(bp);
725 SET(bp->b_flags, B_BUSY);
726 splx(s);
727 /*
728 * XXX Since there are no node locks for NFS, I believe
729 * there is a slight chance that a delayed write will
730 * occur while sleeping just above, so check for it.
731 */
732 if (ISSET(bp->b_flags, B_DELWRI) && (flags & V_SAVE)) {
733 (void) VOP_BWRITE(bp);
734 break;
735 }
736
737 if (bp->b_flags & B_LOCKED) {
738 panic("vinvalbuf: bp @ 0x%x is locked!\n", bp);
739 break;
740 } else {
741 SET(bp->b_flags, B_INVAL);
742 }
743 brelse(bp);
744 }
745 }
746 if (!(flags & V_SAVEMETA) &&
747 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
748 panic("vinvalbuf: flush failed");
749 return (0);
750 }
751
752 /*
753 * Create a vnode for a block device.
754 * Used for root filesystem, argdev, and swap areas.
755 * Also used for memory file system special devices.
756 */
757 int
758 bdevvp(dev, vpp)
759 dev_t dev;
760 struct vnode **vpp;
761 {
762 register struct vnode *vp;
763 struct vnode *nvp;
764 int error;
765
766 if (dev == NODEV) {
767 *vpp = NULLVP;
768 return (ENODEV);
769 }
770 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
771 if (error) {
772 *vpp = NULLVP;
773 return (error);
774 }
775 vp = nvp;
776 vp->v_type = VBLK;
777 if (nvp = checkalias(vp, dev, (struct mount *)0)) {
778 vput(vp);
779 vp = nvp;
780 }
781 *vpp = vp;
782 return (0);
783 }
784
785 /*
786 * Check to see if the new vnode represents a special device
787 * for which we already have a vnode (either because of
788 * bdevvp() or because of a different vnode representing
789 * the same block device). If such an alias exists, deallocate
790 * the existing contents and return the aliased vnode. The
791 * caller is responsible for filling it with its new contents.
792 */
793 struct vnode *
794 checkalias(nvp, nvp_rdev, mp)
795 register struct vnode *nvp;
796 dev_t nvp_rdev;
797 struct mount *mp;
798 {
799 struct proc *p = current_proc(); /* XXX */
800 struct vnode *vp;
801 struct vnode **vpp;
802 struct specinfo * bufhold;
803 int buffree = 1;
804
805 if (nvp->v_type != VBLK && nvp->v_type != VCHR)
806 return (NULLVP);
807
808 bufhold = (struct specinfo *)_MALLOC_ZONE(sizeof(struct specinfo),
809 M_VNODE, M_WAITOK);
810 vpp = &speclisth[SPECHASH(nvp_rdev)];
811 loop:
812 simple_lock(&spechash_slock);
813 for (vp = *vpp; vp; vp = vp->v_specnext) {
814 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
815 continue;
816 /*
817 * Alias, but not in use, so flush it out.
818 */
819 simple_lock(&vp->v_interlock);
820 if (vp->v_usecount == 0) {
821 simple_unlock(&spechash_slock);
822 vgonel(vp, p);
823 goto loop;
824 }
825 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
826 simple_unlock(&spechash_slock);
827 goto loop;
828 }
829 break;
830 }
831 if (vp == NULL || vp->v_tag != VT_NON) {
832 nvp->v_specinfo = bufhold;
833 buffree = 0; /* buffer used */
834 bzero(nvp->v_specinfo, sizeof(struct specinfo));
835 nvp->v_rdev = nvp_rdev;
836 nvp->v_hashchain = vpp;
837 nvp->v_specnext = *vpp;
838 nvp->v_specflags = 0;
839 simple_unlock(&spechash_slock);
840 *vpp = nvp;
841 if (vp != NULLVP) {
842 nvp->v_flag |= VALIASED;
843 vp->v_flag |= VALIASED;
844 vput(vp);
845 }
846 /* Since buffer is used just return */
847 return (NULLVP);
848 }
849 simple_unlock(&spechash_slock);
850 VOP_UNLOCK(vp, 0, p);
851 simple_lock(&vp->v_interlock);
852 vclean(vp, 0, p);
853 vp->v_op = nvp->v_op;
854 vp->v_tag = nvp->v_tag;
855 nvp->v_type = VNON;
856 insmntque(vp, mp);
857 if (buffree)
858 _FREE_ZONE((void *)bufhold, sizeof (struct specinfo), M_VNODE);
859 return (vp);
860 }
861
862 /*
863 * Get a reference on a particular vnode and lock it if requested.
864 * If the vnode was on the inactive list, remove it from the list.
865 * If the vnode was on the free list, remove it from the list and
866 * move it to inactive list as needed.
867 * The vnode lock bit is set if the vnode is being eliminated in
868 * vgone. The process is awakened when the transition is completed,
869 * and an error returned to indicate that the vnode is no longer
870 * usable (possibly having been changed to a new file system type).
871 */
872 int
873 vget(vp, flags, p)
874 struct vnode *vp;
875 int flags;
876 struct proc *p;
877 {
878 int error = 0;
879
880 retry:
881
882 /*
883 * If the vnode is in the process of being cleaned out for
884 * another use, we wait for the cleaning to finish and then
885 * return failure. Cleaning is determined by checking that
886 * the VXLOCK flag is set.
887 */
888 if ((flags & LK_INTERLOCK) == 0)
889 simple_lock(&vp->v_interlock);
890 if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
891 vp->v_flag |= VXWANT;
892 simple_unlock(&vp->v_interlock);
893 (void)tsleep((caddr_t)vp, PINOD, "vget", 0);
894 return (ENOENT);
895 }
896
897 /*
898 * vnode is being terminated.
899 * wait for vnode_pager_no_senders() to clear VTERMINATE
900 */
901 if (ISSET(vp->v_flag, VTERMINATE)) {
902 SET(vp->v_flag, VTERMWANT);
903 simple_unlock(&vp->v_interlock);
904 (void)tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vclean", 0);
905 return (ENOENT);
906 }
907
908 /*
909 * if the vnode is being initialized,
910 * wait for it to finish initialization
911 */
912 if (ISSET(vp->v_flag, VUINIT)) {
913 if (ISSET(vp->v_flag, VUINIT)) {
914 SET(vp->v_flag, VUWANT);
915 simple_unlock(&vp->v_interlock);
916 (void) tsleep((caddr_t)vp, PINOD, "vget2", 0);
917 goto retry;
918 }
919 }
920
921 simple_lock(&vnode_free_list_slock);
922 if (vp->v_usecount == 0) {
923 /* If on the free list, remove it from there */
924 if (VONLIST(vp))
925 VREMFREE("vget", vp);
926 } else {
927 /* If on the inactive list, remove it from there */
928 if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
929 if (VONLIST(vp))
930 VREMINACTIVE("vget", vp);
931 }
932 }
933
934 /* The vnode should not be on the inactive list here */
935 VINACTIVECHECK("vget", vp, 0);
936
937 simple_unlock(&vnode_free_list_slock);
938
939 if (++vp->v_usecount <= 0)
940 panic("vget: v_usecount");
941
942 /*
943 * Recover named reference as needed
944 */
945 if (UBCISVALID(vp) && !ubc_issetflags(vp, UI_HASOBJREF)) {
946 simple_unlock(&vp->v_interlock);
947 if (ubc_getobject(vp, UBC_HOLDOBJECT)) {
948 error = ENOENT;
949 goto errout;
950 }
951 simple_lock(&vp->v_interlock);
952 }
953
954 if (flags & LK_TYPE_MASK) {
955 if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
956 goto errout;
957 return (0);
958 }
959
960 if ((flags & LK_INTERLOCK) == 0)
961 simple_unlock(&vp->v_interlock);
962 return (0);
963
964 errout:
965 /*
966 * If the vnode was not active in the first place
967 * must not call vrele() as VOP_INACTIVE() is not
968 * required.
969 * So inlined part of vrele() here.
970 */
971 simple_lock(&vp->v_interlock);
972 if (--vp->v_usecount == 1) {
973 if (UBCINFOEXISTS(vp)) {
974 vinactive(vp);
975 simple_unlock(&vp->v_interlock);
976 return (error);
977 }
978 }
979 if (vp->v_usecount > 0) {
980 simple_unlock(&vp->v_interlock);
981 return (error);
982 }
983 if (vp->v_usecount < 0)
984 panic("vget: negative usecount (%d)", vp->v_usecount);
985 vfree(vp);
986 simple_unlock(&vp->v_interlock);
987 return (error);
988 }
989
990 /*
991 * Get a pager reference on the particular vnode.
992 *
993 * This is called from ubc_info_init() and it is asumed that
994 * the vnode is neither on the free list on on the inactive list.
995 * It is also assumed that the vnode is neither being recycled
996 * by vgonel nor being terminated by vnode_pager_vrele().
997 *
998 * The vnode interlock is NOT held by the caller.
999 */
1000 __private_extern__ int
1001 vnode_pager_vget(vp)
1002 struct vnode *vp;
1003 {
1004 simple_lock(&vp->v_interlock);
1005 if (UBCINFOMISSING(vp))
1006 panic("vnode_pager_vget: stolen ubc_info");
1007
1008 if (!UBCINFOEXISTS(vp))
1009 panic("vnode_pager_vget: lost ubc_info");
1010
1011 if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM))
1012 panic("vnode_pager_vget: already being reclaimd");
1013
1014 if (ISSET(vp->v_flag, VTERMINATE))
1015 panic("vnode_pager_vget: already being terminated");
1016
1017 simple_lock(&vnode_free_list_slock);
1018 /* The vnode should not be on ANY list */
1019 if (VONLIST(vp))
1020 panic("vnode_pager_vget: still on the list");
1021
1022 /* The vnode should not be on the inactive list here */
1023 VINACTIVECHECK("vnode_pager_vget", vp, 0);
1024 simple_unlock(&vnode_free_list_slock);
1025
1026 /* After all those checks, now do the real work :-) */
1027 if (++vp->v_usecount <= 0)
1028 panic("vnode_pager_vget: v_usecount");
1029 simple_unlock(&vp->v_interlock);
1030
1031 return (0);
1032 }
1033
1034 /*
1035 * Stubs to use when there is no locking to be done on the underlying object.
1036 * A minimal shared lock is necessary to ensure that the underlying object
1037 * is not revoked while an operation is in progress. So, an active shared
1038 * count is maintained in an auxillary vnode lock structure.
1039 */
1040 int
1041 vop_nolock(ap)
1042 struct vop_lock_args /* {
1043 struct vnode *a_vp;
1044 int a_flags;
1045 struct proc *a_p;
1046 } */ *ap;
1047 {
1048 #ifdef notyet
1049 /*
1050 * This code cannot be used until all the non-locking filesystems
1051 * (notably NFS) are converted to properly lock and release nodes.
1052 * Also, certain vnode operations change the locking state within
1053 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
1054 * and symlink). Ideally these operations should not change the
1055 * lock state, but should be changed to let the caller of the
1056 * function unlock them. Otherwise all intermediate vnode layers
1057 * (such as union, umapfs, etc) must catch these functions to do
1058 * the necessary locking at their layer. Note that the inactive
1059 * and lookup operations also change their lock state, but this
1060 * cannot be avoided, so these two operations will always need
1061 * to be handled in intermediate layers.
1062 */
1063 struct vnode *vp = ap->a_vp;
1064 int vnflags, flags = ap->a_flags;
1065
1066 if (vp->v_vnlock == NULL) {
1067 if ((flags & LK_TYPE_MASK) == LK_DRAIN)
1068 return (0);
1069 MALLOC_ZONE(vp->v_vnlock, struct lock__bsd__ *,
1070 sizeof(struct lock__bsd__), M_VNODE, M_WAITOK);
1071 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1072 }
1073 switch (flags & LK_TYPE_MASK) {
1074 case LK_DRAIN:
1075 vnflags = LK_DRAIN;
1076 break;
1077 case LK_EXCLUSIVE:
1078 case LK_SHARED:
1079 vnflags = LK_SHARED;
1080 break;
1081 case LK_UPGRADE:
1082 case LK_EXCLUPGRADE:
1083 case LK_DOWNGRADE:
1084 return (0);
1085 case LK_RELEASE:
1086 default:
1087 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
1088 }
1089 if (flags & LK_INTERLOCK)
1090 vnflags |= LK_INTERLOCK;
1091 return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
1092 #else /* for now */
1093 /*
1094 * Since we are not using the lock manager, we must clear
1095 * the interlock here.
1096 */
1097 if (ap->a_flags & LK_INTERLOCK)
1098 simple_unlock(&ap->a_vp->v_interlock);
1099 return (0);
1100 #endif
1101 }
1102
1103 /*
1104 * Decrement the active use count.
1105 */
1106 int
1107 vop_nounlock(ap)
1108 struct vop_unlock_args /* {
1109 struct vnode *a_vp;
1110 int a_flags;
1111 struct proc *a_p;
1112 } */ *ap;
1113 {
1114 struct vnode *vp = ap->a_vp;
1115
1116 if (vp->v_vnlock == NULL)
1117 return (0);
1118 return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p));
1119 }
1120
1121 /*
1122 * Return whether or not the node is in use.
1123 */
1124 int
1125 vop_noislocked(ap)
1126 struct vop_islocked_args /* {
1127 struct vnode *a_vp;
1128 } */ *ap;
1129 {
1130 struct vnode *vp = ap->a_vp;
1131
1132 if (vp->v_vnlock == NULL)
1133 return (0);
1134 return (lockstatus(vp->v_vnlock));
1135 }
1136
1137 /*
1138 * Vnode reference.
1139 */
1140 void
1141 vref(vp)
1142 struct vnode *vp;
1143 {
1144
1145 simple_lock(&vp->v_interlock);
1146 if (vp->v_usecount <= 0)
1147 panic("vref used where vget required");
1148
1149 /* If on the inactive list, remove it from there */
1150 if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
1151 if (VONLIST(vp)) {
1152 simple_lock(&vnode_free_list_slock);
1153 VREMINACTIVE("vref", vp);
1154 simple_unlock(&vnode_free_list_slock);
1155 }
1156 }
1157 /* The vnode should not be on the inactive list here */
1158 VINACTIVECHECK("vref", vp, 0);
1159
1160 if (++vp->v_usecount <= 0)
1161 panic("vref v_usecount");
1162 simple_unlock(&vp->v_interlock);
1163 }
1164
1165 /*
1166 * put the vnode on appropriate free list.
1167 * called with v_interlock held.
1168 */
1169 static void
1170 vfree(vp)
1171 struct vnode *vp;
1172 {
1173 /*
1174 * if the vnode is not obtained by calling getnewvnode() we
1175 * are not responsible for the cleanup. Just return.
1176 */
1177 if (!(vp->v_flag & VSTANDARD)) {
1178 return;
1179 }
1180
1181 if (vp->v_usecount != 0)
1182 panic("vfree: v_usecount");
1183
1184 /* insert at tail of LRU list or at head if VAGE is set */
1185 simple_lock(&vnode_free_list_slock);
1186
1187 if (VONLIST(vp))
1188 panic("vfree: vnode still on list");
1189
1190 if (vp->v_flag & VAGE) {
1191 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1192 vp->v_flag &= ~VAGE;
1193 } else
1194 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1195 freevnodes++;
1196 simple_unlock(&vnode_free_list_slock);
1197 return;
1198 }
1199
1200 /*
1201 * put the vnode on the inactive list.
1202 * called with v_interlock held
1203 */
1204 static void
1205 vinactive(vp)
1206 struct vnode *vp;
1207 {
1208 if (!UBCINFOEXISTS(vp))
1209 panic("vinactive: not a UBC vnode");
1210
1211 if (vp->v_usecount != 1)
1212 panic("vinactive: v_usecount");
1213
1214 simple_lock(&vnode_free_list_slock);
1215
1216 if (VONLIST(vp))
1217 panic("vinactive: vnode still on list");
1218 VINACTIVECHECK("vinactive", vp, 0);
1219
1220 TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_freelist);
1221 SET(vp->v_flag, VUINACTIVE);
1222 CLR(vp->v_flag, (VNOCACHE_DATA | VRAOFF));
1223
1224 inactivevnodes++;
1225 simple_unlock(&vnode_free_list_slock);
1226 return;
1227 }
1228
1229
1230 /*
1231 * vput(), just unlock and vrele()
1232 */
1233 void
1234 vput(vp)
1235 struct vnode *vp;
1236 {
1237 struct proc *p = current_proc(); /* XXX */
1238
1239 simple_lock(&vp->v_interlock);
1240 if (--vp->v_usecount == 1) {
1241 if (UBCINFOEXISTS(vp)) {
1242 vinactive(vp);
1243 simple_unlock(&vp->v_interlock);
1244 VOP_UNLOCK(vp, 0, p);
1245 return;
1246 }
1247 }
1248 if (vp->v_usecount > 0) {
1249 simple_unlock(&vp->v_interlock);
1250 VOP_UNLOCK(vp, 0, p);
1251 return;
1252 }
1253 #if DIAGNOSTIC
1254 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1255 vprint("vput: bad ref count", vp);
1256 panic("vput: v_usecount = %d, v_writecount = %d",
1257 vp->v_usecount, vp->v_writecount);
1258 }
1259 #endif
1260 if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
1261 VREMINACTIVE("vrele", vp);
1262
1263 simple_unlock(&vp->v_interlock);
1264 VOP_INACTIVE(vp, p);
1265 /*
1266 * The interlock is not held and
1267 * VOP_INCATIVE releases the vnode lock.
1268 * We could block and the vnode might get reactivated
1269 * Can not just call vfree without checking the state
1270 */
1271 simple_lock(&vp->v_interlock);
1272 if (!VONLIST(vp)) {
1273 if (vp->v_usecount == 0)
1274 vfree(vp);
1275 else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp))
1276 vinactive(vp);
1277 }
1278 simple_unlock(&vp->v_interlock);
1279 }
1280
1281 /*
1282 * Vnode release.
1283 * If count drops to zero, call inactive routine and return to freelist.
1284 */
1285 void
1286 vrele(vp)
1287 struct vnode *vp;
1288 {
1289 struct proc *p = current_proc(); /* XXX */
1290
1291 simple_lock(&vp->v_interlock);
1292 if (--vp->v_usecount == 1) {
1293 if (UBCINFOEXISTS(vp)) {
1294 vinactive(vp);
1295 simple_unlock(&vp->v_interlock);
1296 return;
1297 }
1298 }
1299 if (vp->v_usecount > 0) {
1300 simple_unlock(&vp->v_interlock);
1301 return;
1302 }
1303 #if DIAGNOSTIC
1304 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1305 vprint("vrele: bad ref count", vp);
1306 panic("vrele: ref cnt");
1307 }
1308 #endif
1309 if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
1310 VREMINACTIVE("vrele", vp);
1311
1312
1313 if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
1314 /* vnode is being cleaned, just return */
1315 vfree(vp);
1316 simple_unlock(&vp->v_interlock);
1317 return;
1318 }
1319
1320 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1321 VOP_INACTIVE(vp, p);
1322 /*
1323 * vn_lock releases the interlock and
1324 * VOP_INCATIVE releases the vnode lock.
1325 * We could block and the vnode might get reactivated
1326 * Can not just call vfree without checking the state
1327 */
1328 simple_lock(&vp->v_interlock);
1329 if (!VONLIST(vp)) {
1330 if (vp->v_usecount == 0)
1331 vfree(vp);
1332 else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp))
1333 vinactive(vp);
1334 }
1335 simple_unlock(&vp->v_interlock);
1336 }
1337 #if 0
1338 else {
1339 vfree(vp);
1340 simple_unlock(&vp->v_interlock);
1341 kprintf("vrele: vn_lock() failed for vp = 0x%08x\n", vp);
1342 }
1343 #endif
1344 }
1345
1346 void
1347 vagevp(vp)
1348 struct vnode *vp;
1349 {
1350 simple_lock(&vp->v_interlock);
1351 vp->v_flag |= VAGE;
1352 simple_unlock(&vp->v_interlock);
1353 return;
1354 }
1355
1356 /*
1357 * Page or buffer structure gets a reference.
1358 */
1359 void
1360 vhold(vp)
1361 register struct vnode *vp;
1362 {
1363
1364 simple_lock(&vp->v_interlock);
1365 vp->v_holdcnt++;
1366 simple_unlock(&vp->v_interlock);
1367 }
1368
1369 /*
1370 * Page or buffer structure frees a reference.
1371 */
1372 void
1373 holdrele(vp)
1374 register struct vnode *vp;
1375 {
1376
1377 simple_lock(&vp->v_interlock);
1378 if (vp->v_holdcnt <= 0)
1379 panic("holdrele: holdcnt");
1380 vp->v_holdcnt--;
1381 simple_unlock(&vp->v_interlock);
1382 }
1383
1384 /*
1385 * Remove any vnodes in the vnode table belonging to mount point mp.
1386 *
1387 * If MNT_NOFORCE is specified, there should not be any active ones,
1388 * return error if any are found (nb: this is a user error, not a
1389 * system error). If MNT_FORCE is specified, detach any active vnodes
1390 * that are found.
1391 */
1392 #if DIAGNOSTIC
1393 int busyprt = 0; /* print out busy vnodes */
1394 #if 0
1395 struct ctldebug debug1 = { "busyprt", &busyprt };
1396 #endif /* 0 */
1397 #endif
1398
1399 int
1400 vflush(mp, skipvp, flags)
1401 struct mount *mp;
1402 struct vnode *skipvp;
1403 int flags;
1404 {
1405 struct proc *p = current_proc();
1406 struct vnode *vp, *nvp;
1407 int busy = 0;
1408
1409 simple_lock(&mntvnode_slock);
1410 loop:
1411 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1412 if (vp->v_mount != mp)
1413 goto loop;
1414 nvp = vp->v_mntvnodes.le_next;
1415 /*
1416 * Skip over a selected vnode.
1417 */
1418 if (vp == skipvp)
1419 continue;
1420
1421 simple_lock(&vp->v_interlock);
1422 /*
1423 * Skip over a vnodes marked VSYSTEM or VNOFLUSH.
1424 */
1425 if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || (vp->v_flag & VNOFLUSH))) {
1426 simple_unlock(&vp->v_interlock);
1427 continue;
1428 }
1429 /*
1430 * Skip over a vnodes marked VSWAP.
1431 */
1432 if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) {
1433 simple_unlock(&vp->v_interlock);
1434 continue;
1435 }
1436 /*
1437 * If WRITECLOSE is set, only flush out regular file
1438 * vnodes open for writing.
1439 */
1440 if ((flags & WRITECLOSE) &&
1441 (vp->v_writecount == 0 || vp->v_type != VREG)) {
1442 simple_unlock(&vp->v_interlock);
1443 continue;
1444 }
1445 /*
1446 * With v_usecount == 0, all we need to do is clear
1447 * out the vnode data structures and we are done.
1448 */
1449 if (vp->v_usecount == 0) {
1450 simple_unlock(&mntvnode_slock);
1451 vgonel(vp, p);
1452 simple_lock(&mntvnode_slock);
1453 continue;
1454 }
1455 /*
1456 * If FORCECLOSE is set, forcibly close the vnode.
1457 * For block or character devices, revert to an
1458 * anonymous device. For all other files, just kill them.
1459 */
1460 if (flags & FORCECLOSE) {
1461 simple_unlock(&mntvnode_slock);
1462 if (vp->v_type != VBLK && vp->v_type != VCHR) {
1463 vgonel(vp, p);
1464 } else {
1465 vclean(vp, 0, p);
1466 vp->v_op = spec_vnodeop_p;
1467 insmntque(vp, (struct mount *)0);
1468 }
1469 simple_lock(&mntvnode_slock);
1470 continue;
1471 }
1472 #if DIAGNOSTIC
1473 if (busyprt)
1474 vprint("vflush: busy vnode", vp);
1475 #endif
1476 simple_unlock(&vp->v_interlock);
1477 busy++;
1478 }
1479 simple_unlock(&mntvnode_slock);
1480 if (busy && ((flags & FORCECLOSE)==0))
1481 return (EBUSY);
1482 return (0);
1483 }
1484
1485 /*
1486 * Disassociate the underlying file system from a vnode.
1487 * The vnode interlock is held on entry.
1488 */
1489 static void
1490 vclean(vp, flags, p)
1491 struct vnode *vp;
1492 int flags;
1493 struct proc *p;
1494 {
1495 int active;
1496 int removed = 0;
1497 int didhold;
1498
1499 /*
1500 * if the vnode is not obtained by calling getnewvnode() we
1501 * are not responsible for the cleanup. Just return.
1502 */
1503 if (!(vp->v_flag & VSTANDARD)) {
1504 simple_unlock(&vp->v_interlock);
1505 return;
1506 }
1507
1508 /*
1509 * Check to see if the vnode is in use.
1510 * If so we have to reference it before we clean it out
1511 * so that its count cannot fall to zero and generate a
1512 * race against ourselves to recycle it.
1513 */
1514 if (active = vp->v_usecount)
1515 if (++vp->v_usecount <= 0)
1516 panic("vclean: v_usecount");
1517 /*
1518 * Prevent the vnode from being recycled or
1519 * brought into use while we clean it out.
1520 */
1521 if (vp->v_flag & VXLOCK)
1522 panic("vclean: deadlock");
1523 vp->v_flag |= VXLOCK;
1524
1525 /*
1526 * Even if the count is zero, the VOP_INACTIVE routine may still
1527 * have the object locked while it cleans it out. The VOP_LOCK
1528 * ensures that the VOP_INACTIVE routine is done with its work.
1529 * For active vnodes, it ensures that no other activity can
1530 * occur while the underlying object is being cleaned out.
1531 */
1532 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1533
1534 /*
1535 * if this vnode is on the inactive list
1536 * take it off the list.
1537 */
1538 if ((active == 1) &&
1539 (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))) {
1540 simple_lock(&vnode_free_list_slock);
1541 VREMINACTIVE("vclean", vp);
1542 simple_unlock(&vnode_free_list_slock);
1543 removed++;
1544 }
1545
1546 /* Clean the pages in VM. */
1547 if (active && (flags & DOCLOSE))
1548 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1549
1550 /* Clean the pages in VM. */
1551 didhold = ubc_hold(vp);
1552 if ((active) && (didhold))
1553 (void)ubc_clean(vp, 0); /* do not invalidate */
1554
1555 /*
1556 * Clean out any buffers associated with the vnode.
1557 */
1558 if (flags & DOCLOSE) {
1559 if (vp->v_tag == VT_NFS)
1560 nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
1561 else
1562 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1563 }
1564
1565 if (active)
1566 VOP_INACTIVE(vp, p);
1567 else
1568 VOP_UNLOCK(vp, 0, p);
1569
1570 /* Destroy ubc named reference */
1571 if (didhold) {
1572 ubc_rele(vp);
1573 ubc_destroy_named(vp);
1574 }
1575
1576 /*
1577 * Reclaim the vnode.
1578 */
1579 if (VOP_RECLAIM(vp, p))
1580 panic("vclean: cannot reclaim");
1581 cache_purge(vp);
1582 if (vp->v_vnlock) {
1583 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1584 vprint("vclean: lock not drained", vp);
1585 FREE_ZONE(vp->v_vnlock, sizeof (struct lock__bsd__), M_VNODE);
1586 vp->v_vnlock = NULL;
1587 }
1588
1589 /* It's dead, Jim! */
1590 vp->v_op = dead_vnodeop_p;
1591 vp->v_tag = VT_NON;
1592
1593 /*
1594 * Done with purge, notify sleepers of the grim news.
1595 */
1596 vp->v_flag &= ~VXLOCK;
1597 if (vp->v_flag & VXWANT) {
1598 vp->v_flag &= ~VXWANT;
1599 wakeup((caddr_t)vp);
1600 }
1601
1602 if (active)
1603 vrele(vp);
1604 }
1605
1606 /*
1607 * Eliminate all activity associated with the requested vnode
1608 * and with all vnodes aliased to the requested vnode.
1609 */
1610 int
1611 vop_revoke(ap)
1612 struct vop_revoke_args /* {
1613 struct vnode *a_vp;
1614 int a_flags;
1615 } */ *ap;
1616 {
1617 struct vnode *vp, *vq;
1618 struct proc *p = current_proc();
1619
1620 #if DIAGNOSTIC
1621 if ((ap->a_flags & REVOKEALL) == 0)
1622 panic("vop_revoke");
1623 #endif
1624
1625 vp = ap->a_vp;
1626 simple_lock(&vp->v_interlock);
1627
1628 if (vp->v_flag & VALIASED) {
1629 /*
1630 * If a vgone (or vclean) is already in progress,
1631 * wait until it is done and return.
1632 */
1633 if (vp->v_flag & VXLOCK) {
1634 while (vp->v_flag & VXLOCK) {
1635 vp->v_flag |= VXWANT;
1636 simple_unlock(&vp->v_interlock);
1637 (void)tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1638 }
1639 return (0);
1640 }
1641 /*
1642 * Ensure that vp will not be vgone'd while we
1643 * are eliminating its aliases.
1644 */
1645 vp->v_flag |= VXLOCK;
1646 simple_unlock(&vp->v_interlock);
1647 while (vp->v_flag & VALIASED) {
1648 simple_lock(&spechash_slock);
1649 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1650 if (vq->v_rdev != vp->v_rdev ||
1651 vq->v_type != vp->v_type || vp == vq)
1652 continue;
1653 simple_unlock(&spechash_slock);
1654 vgone(vq);
1655 break;
1656 }
1657 if (vq == NULLVP)
1658 simple_unlock(&spechash_slock);
1659 }
1660 /*
1661 * Remove the lock so that vgone below will
1662 * really eliminate the vnode after which time
1663 * vgone will awaken any sleepers.
1664 */
1665 simple_lock(&vp->v_interlock);
1666 vp->v_flag &= ~VXLOCK;
1667 }
1668 vgonel(vp, p);
1669 return (0);
1670 }
1671
1672 /*
1673 * Recycle an unused vnode to the front of the free list.
1674 * Release the passed interlock if the vnode will be recycled.
1675 */
1676 int
1677 vrecycle(vp, inter_lkp, p)
1678 struct vnode *vp;
1679 struct slock *inter_lkp;
1680 struct proc *p;
1681 {
1682
1683 simple_lock(&vp->v_interlock);
1684 if (vp->v_usecount == 0) {
1685 if (inter_lkp)
1686 simple_unlock(inter_lkp);
1687 vgonel(vp, p);
1688 return (1);
1689 }
1690 simple_unlock(&vp->v_interlock);
1691 return (0);
1692 }
1693
1694 /*
1695 * Eliminate all activity associated with a vnode
1696 * in preparation for reuse.
1697 */
1698 void
1699 vgone(vp)
1700 struct vnode *vp;
1701 {
1702 struct proc *p = current_proc();
1703
1704 simple_lock(&vp->v_interlock);
1705 vgonel(vp, p);
1706 }
1707
1708 /*
1709 * vgone, with the vp interlock held.
1710 */
1711 void
1712 vgonel(vp, p)
1713 struct vnode *vp;
1714 struct proc *p;
1715 {
1716 struct vnode *vq;
1717 struct vnode *vx;
1718
1719 /*
1720 * if the vnode is not obtained by calling getnewvnode() we
1721 * are not responsible for the cleanup. Just return.
1722 */
1723 if (!(vp->v_flag & VSTANDARD)) {
1724 simple_unlock(&vp->v_interlock);
1725 return;
1726 }
1727
1728 /*
1729 * If a vgone (or vclean) is already in progress,
1730 * wait until it is done and return.
1731 */
1732 if (vp->v_flag & VXLOCK) {
1733 while (vp->v_flag & VXLOCK) {
1734 vp->v_flag |= VXWANT;
1735 simple_unlock(&vp->v_interlock);
1736 (void)tsleep((caddr_t)vp, PINOD, "vgone", 0);
1737 }
1738 return;
1739 }
1740 /*
1741 * Clean out the filesystem specific data.
1742 */
1743 vclean(vp, DOCLOSE, p);
1744 /*
1745 * Delete from old mount point vnode list, if on one.
1746 */
1747 if (vp->v_mount != NULL)
1748 insmntque(vp, (struct mount *)0);
1749 /*
1750 * If special device, remove it from special device alias list
1751 * if it is on one.
1752 */
1753 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1754 simple_lock(&spechash_slock);
1755 if (*vp->v_hashchain == vp) {
1756 *vp->v_hashchain = vp->v_specnext;
1757 } else {
1758 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1759 if (vq->v_specnext != vp)
1760 continue;
1761 vq->v_specnext = vp->v_specnext;
1762 break;
1763 }
1764 if (vq == NULL)
1765 panic("missing bdev");
1766 }
1767 if (vp->v_flag & VALIASED) {
1768 vx = NULL;
1769 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1770 if (vq->v_rdev != vp->v_rdev ||
1771 vq->v_type != vp->v_type)
1772 continue;
1773 if (vx)
1774 break;
1775 vx = vq;
1776 }
1777 if (vx == NULL)
1778 panic("missing alias");
1779 if (vq == NULL)
1780 vx->v_flag &= ~VALIASED;
1781 vp->v_flag &= ~VALIASED;
1782 }
1783 simple_unlock(&spechash_slock);
1784 FREE_ZONE(vp->v_specinfo, sizeof (struct specinfo), M_VNODE);
1785 vp->v_specinfo = NULL;
1786 }
1787 /*
1788 * If it is on the freelist and not already at the head,
1789 * move it to the head of the list. The test of the back
1790 * pointer and the reference count of zero is because
1791 * it will be removed from the free list by getnewvnode,
1792 * but will not have its reference count incremented until
1793 * after calling vgone. If the reference count were
1794 * incremented first, vgone would (incorrectly) try to
1795 * close the previous instance of the underlying object.
1796 * So, the back pointer is explicitly set to `0xdeadb' in
1797 * getnewvnode after removing it from the freelist to ensure
1798 * that we do not try to move it here.
1799 */
1800 if (vp->v_usecount == 0) {
1801 simple_lock(&vnode_free_list_slock);
1802 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1803 vnode_free_list.tqh_first != vp) {
1804 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1805 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1806 }
1807 simple_unlock(&vnode_free_list_slock);
1808 }
1809 vp->v_type = VBAD;
1810 }
1811
1812 /*
1813 * Lookup a vnode by device number.
1814 */
1815 int
1816 vfinddev(dev, type, vpp)
1817 dev_t dev;
1818 enum vtype type;
1819 struct vnode **vpp;
1820 {
1821 struct vnode *vp;
1822 int rc = 0;
1823
1824 simple_lock(&spechash_slock);
1825 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1826 if (dev != vp->v_rdev || type != vp->v_type)
1827 continue;
1828 *vpp = vp;
1829 rc = 1;
1830 break;
1831 }
1832 simple_unlock(&spechash_slock);
1833 return (rc);
1834 }
1835
1836 /*
1837 * Calculate the total number of references to a special device.
1838 */
1839 int
1840 vcount(vp)
1841 struct vnode *vp;
1842 {
1843 struct vnode *vq, *vnext;
1844 int count;
1845
1846 loop:
1847 if ((vp->v_flag & VALIASED) == 0)
1848 return (vp->v_usecount);
1849 simple_lock(&spechash_slock);
1850 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1851 vnext = vq->v_specnext;
1852 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1853 continue;
1854 /*
1855 * Alias, but not in use, so flush it out.
1856 */
1857 if (vq->v_usecount == 0 && vq != vp) {
1858 simple_unlock(&spechash_slock);
1859 vgone(vq);
1860 goto loop;
1861 }
1862 count += vq->v_usecount;
1863 }
1864 simple_unlock(&spechash_slock);
1865 return (count);
1866 }
1867
1868 int prtactive = 0; /* 1 => print out reclaim of active vnodes */
1869
1870 /*
1871 * Print out a description of a vnode.
1872 */
1873 static char *typename[] =
1874 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1875
1876 void
1877 vprint(label, vp)
1878 char *label;
1879 register struct vnode *vp;
1880 {
1881 char buf[64];
1882
1883 if (label != NULL)
1884 printf("%s: ", label);
1885 printf("type %s, usecount %d, writecount %d, refcount %d,",
1886 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1887 vp->v_holdcnt);
1888 buf[0] = '\0';
1889 if (vp->v_flag & VROOT)
1890 strcat(buf, "|VROOT");
1891 if (vp->v_flag & VTEXT)
1892 strcat(buf, "|VTEXT");
1893 if (vp->v_flag & VSYSTEM)
1894 strcat(buf, "|VSYSTEM");
1895 if (vp->v_flag & VNOFLUSH)
1896 strcat(buf, "|VNOFLUSH");
1897 if (vp->v_flag & VXLOCK)
1898 strcat(buf, "|VXLOCK");
1899 if (vp->v_flag & VXWANT)
1900 strcat(buf, "|VXWANT");
1901 if (vp->v_flag & VBWAIT)
1902 strcat(buf, "|VBWAIT");
1903 if (vp->v_flag & VALIASED)
1904 strcat(buf, "|VALIASED");
1905 if (buf[0] != '\0')
1906 printf(" flags (%s)", &buf[1]);
1907 if (vp->v_data == NULL) {
1908 printf("\n");
1909 } else {
1910 printf("\n\t");
1911 VOP_PRINT(vp);
1912 }
1913 }
1914
1915 #ifdef DEBUG
1916 /*
1917 * List all of the locked vnodes in the system.
1918 * Called when debugging the kernel.
1919 */
1920 void
1921 printlockedvnodes()
1922 {
1923 struct proc *p = current_proc();
1924 struct mount *mp, *nmp;
1925 struct vnode *vp;
1926
1927 printf("Locked vnodes\n");
1928 simple_lock(&mountlist_slock);
1929 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1930 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1931 nmp = mp->mnt_list.cqe_next;
1932 continue;
1933 }
1934 for (vp = mp->mnt_vnodelist.lh_first;
1935 vp != NULL;
1936 vp = vp->v_mntvnodes.le_next) {
1937 if (VOP_ISLOCKED(vp))
1938 vprint((char *)0, vp);
1939 }
1940 simple_lock(&mountlist_slock);
1941 nmp = mp->mnt_list.cqe_next;
1942 vfs_unbusy(mp, p);
1943 }
1944 simple_unlock(&mountlist_slock);
1945 }
1946 #endif
1947
1948 /*
1949 * Top level filesystem related information gathering.
1950 */
1951 int
1952 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1953 int *name;
1954 u_int namelen;
1955 void *oldp;
1956 size_t *oldlenp;
1957 void *newp;
1958 size_t newlen;
1959 struct proc *p;
1960 {
1961 struct vfsconf *vfsp;
1962
1963 /*
1964 * The VFS_NUMMNTOPS shouldn't be at name[0] since
1965 * is a VFS generic variable. So now we must check
1966 * namelen so we don't end up covering any UFS
1967 * variables (sinc UFS vfc_typenum is 1).
1968 *
1969 * It should have been:
1970 * name[0]: VFS_GENERIC
1971 * name[1]: VFS_NUMMNTOPS
1972 */
1973 if (namelen == 1 && name[0] == VFS_NUMMNTOPS) {
1974 extern unsigned int vfs_nummntops;
1975 return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops));
1976 }
1977
1978 /* all sysctl names at this level are at least name and field */
1979 if (namelen < 2)
1980 return (ENOTDIR); /* overloaded */
1981 if (name[0] != VFS_GENERIC) {
1982 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1983 if (vfsp->vfc_typenum == name[0])
1984 break;
1985 if (vfsp == NULL)
1986 return (EOPNOTSUPP);
1987 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1988 oldp, oldlenp, newp, newlen, p));
1989 }
1990 switch (name[1]) {
1991 case VFS_MAXTYPENUM:
1992 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1993 case VFS_CONF:
1994 if (namelen < 3)
1995 return (ENOTDIR); /* overloaded */
1996 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1997 if (vfsp->vfc_typenum == name[2])
1998 break;
1999 if (vfsp == NULL)
2000 return (EOPNOTSUPP);
2001 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
2002 sizeof(struct vfsconf)));
2003 }
2004 return (EOPNOTSUPP);
2005 }
2006
2007 int kinfo_vdebug = 1;
2008 #define KINFO_VNODESLOP 10
2009 /*
2010 * Dump vnode list (via sysctl).
2011 * Copyout address of vnode followed by vnode.
2012 */
2013 /* ARGSUSED */
2014 int
2015 sysctl_vnode(where, sizep, p)
2016 char *where;
2017 size_t *sizep;
2018 struct proc *p;
2019 {
2020 struct mount *mp, *nmp;
2021 struct vnode *nvp, *vp;
2022 char *bp = where, *savebp;
2023 char *ewhere;
2024 int error;
2025
2026 #define VPTRSZ sizeof (struct vnode *)
2027 #define VNODESZ sizeof (struct vnode)
2028 if (where == NULL) {
2029 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2030 return (0);
2031 }
2032 ewhere = where + *sizep;
2033
2034 simple_lock(&mountlist_slock);
2035 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2036 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
2037 nmp = mp->mnt_list.cqe_next;
2038 continue;
2039 }
2040 savebp = bp;
2041 again:
2042 simple_lock(&mntvnode_slock);
2043 for (vp = mp->mnt_vnodelist.lh_first;
2044 vp != NULL;
2045 vp = nvp) {
2046 /*
2047 * Check that the vp is still associated with
2048 * this filesystem. RACE: could have been
2049 * recycled onto the same filesystem.
2050 */
2051 if (vp->v_mount != mp) {
2052 simple_unlock(&mntvnode_slock);
2053 if (kinfo_vdebug)
2054 printf("kinfo: vp changed\n");
2055 bp = savebp;
2056 goto again;
2057 }
2058 nvp = vp->v_mntvnodes.le_next;
2059 if (bp + VPTRSZ + VNODESZ > ewhere) {
2060 simple_unlock(&mntvnode_slock);
2061 *sizep = bp - where;
2062 return (ENOMEM);
2063 }
2064 simple_unlock(&mntvnode_slock);
2065 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
2066 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
2067 return (error);
2068 bp += VPTRSZ + VNODESZ;
2069 simple_lock(&mntvnode_slock);
2070 }
2071 simple_unlock(&mntvnode_slock);
2072 simple_lock(&mountlist_slock);
2073 nmp = mp->mnt_list.cqe_next;
2074 vfs_unbusy(mp, p);
2075 }
2076 simple_unlock(&mountlist_slock);
2077
2078 *sizep = bp - where;
2079 return (0);
2080 }
2081
2082 /*
2083 * Check to see if a filesystem is mounted on a block device.
2084 */
2085 int
2086 vfs_mountedon(vp)
2087 struct vnode *vp;
2088 {
2089 struct vnode *vq;
2090 int error = 0;
2091
2092 if (vp->v_specflags & SI_MOUNTEDON)
2093 return (EBUSY);
2094 if (vp->v_flag & VALIASED) {
2095 simple_lock(&spechash_slock);
2096 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
2097 if (vq->v_rdev != vp->v_rdev ||
2098 vq->v_type != vp->v_type)
2099 continue;
2100 if (vq->v_specflags & SI_MOUNTEDON) {
2101 error = EBUSY;
2102 break;
2103 }
2104 }
2105 simple_unlock(&spechash_slock);
2106 }
2107 return (error);
2108 }
2109
2110 /*
2111 * Unmount all filesystems. The list is traversed in reverse order
2112 * of mounting to avoid dependencies.
2113 */
2114 __private_extern__ void
2115 vfs_unmountall()
2116 {
2117 struct mount *mp, *nmp;
2118 struct proc *p = current_proc();
2119
2120 /*
2121 * Since this only runs when rebooting, it is not interlocked.
2122 */
2123 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2124 nmp = mp->mnt_list.cqe_prev;
2125 (void) dounmount(mp, MNT_FORCE, p);
2126 }
2127 }
2128
2129 /*
2130 * Build hash lists of net addresses and hang them off the mount point.
2131 * Called by vfs_export() to set up the lists of export addresses.
2132 */
2133 static int
2134 vfs_hang_addrlist(mp, nep, argp)
2135 struct mount *mp;
2136 struct netexport *nep;
2137 struct export_args *argp;
2138 {
2139 register struct netcred *np;
2140 register struct radix_node_head *rnh;
2141 register int i;
2142 struct radix_node *rn;
2143 struct sockaddr *saddr, *smask = 0;
2144 struct domain *dom;
2145 int error;
2146
2147 if (argp->ex_addrlen == 0) {
2148 if (mp->mnt_flag & MNT_DEFEXPORTED)
2149 return (EPERM);
2150 np = &nep->ne_defexported;
2151 np->netc_exflags = argp->ex_flags;
2152 np->netc_anon = argp->ex_anon;
2153 np->netc_anon.cr_ref = 1;
2154 mp->mnt_flag |= MNT_DEFEXPORTED;
2155 return (0);
2156 }
2157 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2158 MALLOC(np, struct netcred *, i, M_NETADDR, M_WAITOK);
2159 bzero((caddr_t)np, i);
2160 saddr = (struct sockaddr *)(np + 1);
2161 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
2162 goto out;
2163 if (saddr->sa_len > argp->ex_addrlen)
2164 saddr->sa_len = argp->ex_addrlen;
2165 if (argp->ex_masklen) {
2166 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
2167 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
2168 if (error)
2169 goto out;
2170 if (smask->sa_len > argp->ex_masklen)
2171 smask->sa_len = argp->ex_masklen;
2172 }
2173 i = saddr->sa_family;
2174 if ((rnh = nep->ne_rtable[i]) == 0) {
2175 /*
2176 * Seems silly to initialize every AF when most are not
2177 * used, do so on demand here
2178 */
2179 for (dom = domains; dom; dom = dom->dom_next)
2180 if (dom->dom_family == i && dom->dom_rtattach) {
2181 dom->dom_rtattach((void **)&nep->ne_rtable[i],
2182 dom->dom_rtoffset);
2183 break;
2184 }
2185 if ((rnh = nep->ne_rtable[i]) == 0) {
2186 error = ENOBUFS;
2187 goto out;
2188 }
2189 }
2190 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
2191 np->netc_rnodes);
2192 if (rn == 0) {
2193 /*
2194 * One of the reasons that rnh_addaddr may fail is that
2195 * the entry already exists. To check for this case, we
2196 * look up the entry to see if it is there. If so, we
2197 * do not need to make a new entry but do return success.
2198 */
2199 _FREE(np, M_NETADDR);
2200 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
2201 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
2202 ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
2203 !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
2204 (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
2205 return (0);
2206 return (EPERM);
2207 }
2208 np->netc_exflags = argp->ex_flags;
2209 np->netc_anon = argp->ex_anon;
2210 np->netc_anon.cr_ref = 1;
2211 return (0);
2212 out:
2213 _FREE(np, M_NETADDR);
2214 return (error);
2215 }
2216
2217 /* ARGSUSED */
2218 static int
2219 vfs_free_netcred(rn, w)
2220 struct radix_node *rn;
2221 caddr_t w;
2222 {
2223 register struct radix_node_head *rnh = (struct radix_node_head *)w;
2224
2225 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
2226 _FREE((caddr_t)rn, M_NETADDR);
2227 return (0);
2228 }
2229
2230 /*
2231 * Free the net address hash lists that are hanging off the mount points.
2232 */
2233 static void
2234 vfs_free_addrlist(nep)
2235 struct netexport *nep;
2236 {
2237 register int i;
2238 register struct radix_node_head *rnh;
2239
2240 for (i = 0; i <= AF_MAX; i++)
2241 if (rnh = nep->ne_rtable[i]) {
2242 (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
2243 (caddr_t)rnh);
2244 _FREE((caddr_t)rnh, M_RTABLE);
2245 nep->ne_rtable[i] = 0;
2246 }
2247 }
2248
2249 int
2250 vfs_export(mp, nep, argp)
2251 struct mount *mp;
2252 struct netexport *nep;
2253 struct export_args *argp;
2254 {
2255 int error;
2256
2257 if (argp->ex_flags & MNT_DELEXPORT) {
2258 vfs_free_addrlist(nep);
2259 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2260 }
2261 if (argp->ex_flags & MNT_EXPORTED) {
2262 if (error = vfs_hang_addrlist(mp, nep, argp))
2263 return (error);
2264 mp->mnt_flag |= MNT_EXPORTED;
2265 }
2266 return (0);
2267 }
2268
2269 struct netcred *
2270 vfs_export_lookup(mp, nep, nam)
2271 register struct mount *mp;
2272 struct netexport *nep;
2273 struct mbuf *nam;
2274 {
2275 register struct netcred *np;
2276 register struct radix_node_head *rnh;
2277 struct sockaddr *saddr;
2278
2279 np = NULL;
2280 if (mp->mnt_flag & MNT_EXPORTED) {
2281 /*
2282 * Lookup in the export list first.
2283 */
2284 if (nam != NULL) {
2285 saddr = mtod(nam, struct sockaddr *);
2286 rnh = nep->ne_rtable[saddr->sa_family];
2287 if (rnh != NULL) {
2288 np = (struct netcred *)
2289 (*rnh->rnh_matchaddr)((caddr_t)saddr,
2290 rnh);
2291 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2292 np = NULL;
2293 }
2294 }
2295 /*
2296 * If no address match, use the default if it exists.
2297 */
2298 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2299 np = &nep->ne_defexported;
2300 }
2301 return (np);
2302 }
2303
2304 /*
2305 * try to reclaim vnodes from the memory
2306 * object cache
2307 */
2308 static int
2309 vm_object_cache_reclaim(int count)
2310 {
2311 int cnt;
2312 void vnode_pager_release_from_cache(int *);
2313
2314 /* attempt to reclaim vnodes from VM object cache */
2315 cnt = count;
2316 vnode_pager_release_from_cache(&cnt);
2317 return(cnt);
2318 }
2319
2320 /*
2321 * Release memory object reference held by inactive vnodes
2322 * and then try to reclaim some vnodes from the memory
2323 * object cache
2324 */
2325 static int
2326 vnreclaim(int count)
2327 {
2328 int i, loopcnt;
2329 struct vnode *vp;
2330 int err;
2331 struct proc *p;
2332
2333 i = 0;
2334 loopcnt = 0;
2335
2336 /* Try to release "count" vnodes from the inactive list */
2337 restart:
2338 if (++loopcnt > inactivevnodes) {
2339 /*
2340 * I did my best trying to reclaim the vnodes.
2341 * Do not try any more as that would only lead to
2342 * long latencies. Also in the worst case
2343 * this can get totally CPU bound.
2344 * Just fall though and attempt a reclaim of VM
2345 * object cache
2346 */
2347 goto out;
2348 }
2349
2350 simple_lock(&vnode_free_list_slock);
2351 for (vp = TAILQ_FIRST(&vnode_inactive_list);
2352 (vp != NULLVP) && (i < count);
2353 vp = TAILQ_NEXT(vp, v_freelist)) {
2354
2355 if (!simple_lock_try(&vp->v_interlock))
2356 continue;
2357
2358 if (vp->v_usecount != 1)
2359 panic("vnreclaim: v_usecount");
2360
2361 if(!UBCINFOEXISTS(vp)) {
2362 if (vp->v_type == VBAD) {
2363 VREMINACTIVE("vnreclaim", vp);
2364 simple_unlock(&vp->v_interlock);
2365 continue;
2366 } else
2367 panic("non UBC vnode on inactive list");
2368 /* Should not reach here */
2369 }
2370
2371 /* If vnode is already being reclaimed, wait */
2372 if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
2373 vp->v_flag |= VXWANT;
2374 simple_unlock(&vp->v_interlock);
2375 simple_unlock(&vnode_free_list_slock);
2376 (void)tsleep((caddr_t)vp, PINOD, "vocr", 0);
2377 goto restart;
2378 }
2379
2380 VREMINACTIVE("vnreclaim", vp);
2381 simple_unlock(&vnode_free_list_slock);
2382
2383 if (ubc_issetflags(vp, UI_WASMAPPED)) {
2384 /*
2385 * We should not reclaim as it is likely
2386 * to be in use. Let it die a natural death.
2387 * Release the UBC reference if one exists
2388 * and put it back at the tail.
2389 */
2390 simple_unlock(&vp->v_interlock);
2391 if (ubc_release_named(vp)) {
2392 if (UBCINFOEXISTS(vp)) {
2393 simple_lock(&vp->v_interlock);
2394 if (vp->v_usecount == 1 && !VONLIST(vp))
2395 vinactive(vp);
2396 simple_unlock(&vp->v_interlock);
2397 }
2398 } else {
2399 simple_lock(&vp->v_interlock);
2400 vinactive(vp);
2401 simple_unlock(&vp->v_interlock);
2402 }
2403 } else {
2404 int didhold;
2405
2406 VORECLAIM_ENABLE(vp);
2407
2408 /*
2409 * scrub the dirty pages and invalidate the buffers
2410 */
2411 p = current_proc();
2412 err = vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p);
2413 if (err) {
2414 /* cannot reclaim */
2415 simple_lock(&vp->v_interlock);
2416 vinactive(vp);
2417 VORECLAIM_DISABLE(vp);
2418 i++;
2419 simple_unlock(&vp->v_interlock);
2420 goto restart;
2421 }
2422
2423 /* keep the vnode alive so we can kill it */
2424 simple_lock(&vp->v_interlock);
2425 if(vp->v_usecount != 1)
2426 panic("VOCR: usecount race");
2427 vp->v_usecount++;
2428 simple_unlock(&vp->v_interlock);
2429
2430 /* clean up the state in VM without invalidating */
2431 didhold = ubc_hold(vp);
2432 if (didhold)
2433 (void)ubc_clean(vp, 0);
2434
2435 /* flush and invalidate buffers associated with the vnode */
2436 if (vp->v_tag == VT_NFS)
2437 nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
2438 else
2439 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
2440
2441 /*
2442 * Note: for the v_usecount == 2 case, VOP_INACTIVE
2443 * has not yet been called. Call it now while vp is
2444 * still locked, it will also release the lock.
2445 */
2446 if (vp->v_usecount == 2)
2447 VOP_INACTIVE(vp, p);
2448 else
2449 VOP_UNLOCK(vp, 0, p);
2450
2451 if (didhold)
2452 ubc_rele(vp);
2453
2454 /*
2455 * destroy the ubc named reference.
2456 * If we can't because it is held for I/Os
2457 * in progress, just put it back on the inactive
2458 * list and move on. Otherwise, the paging reference
2459 * is toast (and so is this vnode?).
2460 */
2461 if (ubc_destroy_named(vp)) {
2462 i++;
2463 }
2464 simple_lock(&vp->v_interlock);
2465 VORECLAIM_DISABLE(vp);
2466 simple_unlock(&vp->v_interlock);
2467 vrele(vp); /* release extra use we added here */
2468 }
2469 /* inactive list lock was released, must restart */
2470 goto restart;
2471 }
2472 simple_unlock(&vnode_free_list_slock);
2473
2474 vnode_reclaim_tried += i;
2475 out:
2476 i = vm_object_cache_reclaim(count);
2477 vnode_objects_reclaimed += i;
2478
2479 return(i);
2480 }
2481
2482 /*
2483 * This routine is called from vnode_pager_no_senders()
2484 * which in turn can be called with vnode locked by vnode_uncache()
2485 * But it could also get called as a result of vm_object_cache_trim().
2486 * In that case lock state is unknown.
2487 * AGE the vnode so that it gets recycled quickly.
2488 * Check lock status to decide whether to call vput() or vrele().
2489 */
2490 __private_extern__ void
2491 vnode_pager_vrele(struct vnode *vp)
2492 {
2493
2494 boolean_t funnel_state;
2495 int isvnreclaim = 1;
2496
2497 if (vp == (struct vnode *) NULL)
2498 panic("vnode_pager_vrele: null vp");
2499
2500 funnel_state = thread_funnel_set(kernel_flock, TRUE);
2501
2502 /* Mark the vnode to be recycled */
2503 vagevp(vp);
2504
2505 simple_lock(&vp->v_interlock);
2506 /*
2507 * If a vgone (or vclean) is already in progress,
2508 * Do not bother with the ubc_info cleanup.
2509 * Let the vclean deal with it.
2510 */
2511 if (vp->v_flag & VXLOCK) {
2512 CLR(vp->v_flag, VTERMINATE);
2513 if (ISSET(vp->v_flag, VTERMWANT)) {
2514 CLR(vp->v_flag, VTERMWANT);
2515 wakeup((caddr_t)&vp->v_ubcinfo);
2516 }
2517 simple_unlock(&vp->v_interlock);
2518 vrele(vp);
2519 (void) thread_funnel_set(kernel_flock, funnel_state);
2520 return;
2521 }
2522
2523 /* It's dead, Jim! */
2524 if (!ISSET(vp->v_flag, VORECLAIM)) {
2525 /*
2526 * called as a result of eviction of the memory
2527 * object from the memory object cache
2528 */
2529 isvnreclaim = 0;
2530
2531 /* So serialize vnode operations */
2532 VORECLAIM_ENABLE(vp);
2533 }
2534 if (!ISSET(vp->v_flag, VTERMINATE))
2535 SET(vp->v_flag, VTERMINATE);
2536 if (UBCINFOEXISTS(vp)) {
2537 struct ubc_info *uip = vp->v_ubcinfo;
2538
2539 if (ubc_issetflags(vp, UI_WASMAPPED))
2540 SET(vp->v_flag, VWASMAPPED);
2541
2542 vp->v_ubcinfo = UBC_NOINFO; /* catch bad accesses */
2543 simple_unlock(&vp->v_interlock);
2544 ubc_info_deallocate(uip);
2545 } else {
2546 if ((vp->v_type == VBAD) && ((vp)->v_ubcinfo != UBC_INFO_NULL)
2547 && ((vp)->v_ubcinfo != UBC_NOINFO)) {
2548 struct ubc_info *uip = vp->v_ubcinfo;
2549
2550 vp->v_ubcinfo = UBC_NOINFO; /* catch bad accesses */
2551 simple_unlock(&vp->v_interlock);
2552 ubc_info_deallocate(uip);
2553 } else {
2554 simple_unlock(&vp->v_interlock);
2555 }
2556 }
2557
2558 CLR(vp->v_flag, VTERMINATE);
2559
2560 if (vp->v_type != VBAD){
2561 vgone(vp); /* revoke the vnode */
2562 vrele(vp); /* and drop the reference */
2563 } else
2564 vrele(vp);
2565
2566 if (ISSET(vp->v_flag, VTERMWANT)) {
2567 CLR(vp->v_flag, VTERMWANT);
2568 wakeup((caddr_t)&vp->v_ubcinfo);
2569 }
2570 if (!isvnreclaim)
2571 VORECLAIM_DISABLE(vp);
2572 (void) thread_funnel_set(kernel_flock, funnel_state);
2573 return;
2574 }
2575
2576
2577 #if DIAGNOSTIC
2578 int walk_vnodes_debug=0;
2579
2580 void
2581 walk_allvnodes()
2582 {
2583 struct mount *mp, *nmp;
2584 struct vnode *vp;
2585 int cnt = 0;
2586
2587 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2588 for (vp = mp->mnt_vnodelist.lh_first;
2589 vp != NULL;
2590 vp = vp->v_mntvnodes.le_next) {
2591 if (vp->v_usecount < 0){
2592 if(walk_vnodes_debug) {
2593 printf("vp is %x\n",vp);
2594 }
2595 }
2596 }
2597 nmp = mp->mnt_list.cqe_next;
2598 }
2599 for (cnt = 0, vp = vnode_free_list.tqh_first;
2600 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2601 if ((vp->v_usecount < 0) && walk_vnodes_debug) {
2602 if(walk_vnodes_debug) {
2603 printf("vp is %x\n",vp);
2604 }
2605 }
2606 }
2607 printf("%d - free\n", cnt);
2608
2609 for (cnt = 0, vp = vnode_inactive_list.tqh_first;
2610 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2611 if ((vp->v_usecount < 0) && walk_vnodes_debug) {
2612 if(walk_vnodes_debug) {
2613 printf("vp is %x\n",vp);
2614 }
2615 }
2616 }
2617 printf("%d - inactive\n", cnt);
2618 }
2619 #endif /* DIAGNOSTIC */
2620
2621 void
2622 vfs_io_attributes(vp, flags, iosize, vectors)
2623 struct vnode *vp;
2624 int flags; /* B_READ or B_WRITE */
2625 int *iosize;
2626 int *vectors;
2627 {
2628 struct mount *mp;
2629
2630 /* start with "reasonable" defaults */
2631 *iosize = MAXPHYS;
2632 *vectors = 32;
2633
2634 mp = vp->v_mount;
2635 if (mp != NULL) {
2636 switch (flags) {
2637 case B_READ:
2638 *iosize = mp->mnt_maxreadcnt;
2639 *vectors = mp->mnt_segreadcnt;
2640 break;
2641 case B_WRITE:
2642 *iosize = mp->mnt_maxwritecnt;
2643 *vectors = mp->mnt_segwritecnt;
2644 break;
2645 default:
2646 break;
2647 }
2648 }
2649
2650 return;
2651 }
2652
2653 #include <dev/disk.h>
2654
2655 int
2656 vfs_init_io_attributes(devvp, mp)
2657 struct vnode *devvp;
2658 struct mount *mp;
2659 {
2660 int error;
2661 off_t readblockcnt;
2662 off_t writeblockcnt;
2663 off_t readsegcnt;
2664 off_t writesegcnt;
2665 u_long blksize;
2666
2667 u_int64_t temp;
2668
2669 struct proc *p = current_proc();
2670 struct ucred *cred = p->p_ucred;
2671
2672 if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD,
2673 (caddr_t)&readblockcnt, 0, cred, p)))
2674 return (error);
2675
2676 if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE,
2677 (caddr_t)&writeblockcnt, 0, cred, p)))
2678 return (error);
2679
2680 if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD,
2681 (caddr_t)&readsegcnt, 0, cred, p)))
2682 return (error);
2683
2684 if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE,
2685 (caddr_t)&writesegcnt, 0, cred, p)))
2686 return (error);
2687
2688 if ((error = VOP_IOCTL(devvp, DKIOCGETBLOCKSIZE,
2689 (caddr_t)&blksize, 0, cred, p)))
2690 return (error);
2691
2692 temp = readblockcnt * blksize;
2693 temp = (temp > UINT32_MAX) ? (UINT32_MAX / blksize) * blksize : temp;
2694 mp->mnt_maxreadcnt = (u_int32_t)temp;
2695
2696 temp = writeblockcnt * blksize;
2697 temp = (temp > UINT32_MAX) ? (UINT32_MAX / blksize) * blksize : temp;
2698 mp->mnt_maxwritecnt = (u_int32_t)temp;
2699
2700 temp = (readsegcnt > UINT16_MAX) ? UINT16_MAX : readsegcnt;
2701 mp->mnt_segreadcnt = (u_int16_t)temp;
2702
2703 temp = (writesegcnt > UINT16_MAX) ? UINT16_MAX : writesegcnt;
2704 mp->mnt_segwritecnt = (u_int16_t)temp;
2705
2706 #if 0
2707 printf("--- IO attributes for mount point 0x%08x ---\n", mp);
2708 printf("\tmnt_maxreadcnt = 0x%x", mp->mnt_maxreadcnt);
2709 printf("\tmnt_maxwritecnt = 0x%x\n", mp->mnt_maxwritecnt);
2710 printf("\tmnt_segreadcnt = 0x%x", mp->mnt_segreadcnt);
2711 printf("\tmnt_segwritecnt = 0x%x\n", mp->mnt_segwritecnt);
2712 #endif /* 0 */
2713
2714 return (error);
2715 }
2716