]> git.saurik.com Git - apple/xnu.git/blame - bsd/vfs/vfs_subr.c
xnu-201.19.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_subr.c
CommitLineData
1c79356b 1/*
0b4e3aa0 2 * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23/*
24 * Copyright (c) 1989, 1993
25 * The Regents of the University of California. All rights reserved.
26 * (c) UNIX System Laboratories, Inc.
27 * All or some portions of this file are derived from material licensed
28 * to the University of California by American Telephone and Telegraph
29 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
30 * the permission of UNIX System Laboratories, Inc.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
61 */
62
63/*
64 * External virtual filesystem routines
65 */
66
67#define DIAGNOSTIC 1
68
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/proc.h>
72#include <sys/mount.h>
73#include <sys/time.h>
74#include <sys/vnode.h>
75#include <sys/stat.h>
76#include <sys/namei.h>
77#include <sys/ucred.h>
78#include <sys/buf.h>
79#include <sys/errno.h>
80#include <sys/malloc.h>
81#include <sys/domain.h>
82#include <sys/mbuf.h>
83#include <sys/syslog.h>
84#include <sys/ubc.h>
85#include <sys/vm.h>
86#include <sys/sysctl.h>
87
88#include <kern/assert.h>
89
90#include <miscfs/specfs/specdev.h>
91
0b4e3aa0
A
92#include <mach/mach_types.h>
93#include <mach/memory_object_types.h>
94
95
1c79356b
A
96enum vtype iftovt_tab[16] = {
97 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
98 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
99};
100int vttoif_tab[9] = {
101 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
102 S_IFSOCK, S_IFIFO, S_IFMT,
103};
104
105static void vfree(struct vnode *vp);
106static void vinactive(struct vnode *vp);
0b4e3aa0 107static int vnreclaim(int count);
1c79356b
A
108extern kern_return_t
109 adjust_vm_object_cache(vm_size_t oval, vm_size_t nval);
110
111/*
112 * Insq/Remq for the vnode usage lists.
113 */
114#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
115#define bufremvn(bp) { \
116 LIST_REMOVE(bp, b_vnbufs); \
117 (bp)->b_vnbufs.le_next = NOLIST; \
118}
119
120TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
121TAILQ_HEAD(inactivelst, vnode) vnode_inactive_list; /* vnode inactive list */
122struct mntlist mountlist; /* mounted filesystem list */
123
124#if DIAGNOSTIC
125#define VLISTCHECK(fun, vp, list) \
126 if ((vp)->v_freelist.tqe_prev == (struct vnode **)0xdeadb) \
127 panic("%s: %s vnode not on %slist", (fun), (list), (list));
128
129#define VINACTIVECHECK(fun, vp, expected) \
130 do { \
131 int __is_inactive = ISSET((vp)->v_flag, VUINACTIVE); \
132 if (__is_inactive ^ expected) \
133 panic("%s: %sinactive vnode, expected %s", (fun), \
134 __is_inactive? "" : "not ", \
135 expected? "inactive": "not inactive"); \
136 } while(0)
137#else
138#define VLISTCHECK(fun, vp, list)
139#define VINACTIVECHECK(fun, vp, expected)
140#endif /* DIAGNOSTIC */
141
142#define VLISTNONE(vp) \
143 do { \
144 (vp)->v_freelist.tqe_next = (struct vnode *)0; \
145 (vp)->v_freelist.tqe_prev = (struct vnode **)0xdeadb; \
146 } while(0)
147
148#define VONLIST(vp) \
149 ((vp)->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
150
151/* remove a vnode from free vnode list */
152#define VREMFREE(fun, vp) \
153 do { \
154 VLISTCHECK((fun), (vp), "free"); \
155 TAILQ_REMOVE(&vnode_free_list, (vp), v_freelist); \
156 VLISTNONE((vp)); \
157 freevnodes--; \
158 } while(0)
159
160/* remove a vnode from inactive vnode list */
161#define VREMINACTIVE(fun, vp) \
162 do { \
163 VLISTCHECK((fun), (vp), "inactive"); \
164 VINACTIVECHECK((fun), (vp), VUINACTIVE); \
165 TAILQ_REMOVE(&vnode_inactive_list, (vp), v_freelist); \
166 CLR((vp)->v_flag, VUINACTIVE); \
167 VLISTNONE((vp)); \
168 inactivevnodes--; \
169 } while(0)
170
171#define VORECLAIM_ENABLE(vp) \
172 do { \
173 if (ISSET((vp)->v_flag, VORECLAIM)) \
174 panic("vm object raclaim already"); \
175 SET((vp)->v_flag, VORECLAIM); \
176 } while(0)
177
178#define VORECLAIM_DISABLE(vp) \
179 do { \
180 CLR((vp)->v_flag, VORECLAIM); \
181 if (ISSET((vp)->v_flag, VXWANT)) { \
182 CLR((vp)->v_flag, VXWANT); \
183 wakeup((caddr_t)(vp)); \
184 } \
185 } while(0)
186
187/*
188 * Have to declare first two locks as actual data even if !MACH_SLOCKS, since
189 * a pointers to them get passed around.
190 */
191simple_lock_data_t mountlist_slock;
192simple_lock_data_t mntvnode_slock;
193decl_simple_lock_data(,mntid_slock);
194decl_simple_lock_data(,vnode_free_list_slock);
195decl_simple_lock_data(,spechash_slock);
196
197/*
198 * vnodetarget is the amount of vnodes we expect to get back
199 * from the the inactive vnode list and VM object cache.
200 * As vnreclaim() is a mainly cpu bound operation for faster
201 * processers this number could be higher.
202 * Having this number too high introduces longer delays in
203 * the execution of getnewvnode().
204 */
205unsigned long vnodetarget; /* target for vnreclaim() */
206#define VNODE_FREE_TARGET 20 /* Default value for vnodetarget */
207
208/*
209 * We need quite a few vnodes on the free list to sustain the
210 * rapid stat() the compilation process does, and still benefit from the name
211 * cache. Having too few vnodes on the free list causes serious disk
212 * thrashing as we cycle through them.
213 */
0b4e3aa0 214#define VNODE_FREE_MIN 300 /* freelist should have at least these many */
1c79356b
A
215
216/*
217 * We need to get vnodes back from the VM object cache when a certain #
218 * of vnodes are reused from the freelist. This is essential for the
219 * caching to be effective in the namecache and the buffer cache [for the
220 * metadata].
221 */
222#define VNODE_TOOMANY_REUSED (VNODE_FREE_MIN/4)
223
224/*
225 * If we have enough vnodes on the freelist we do not want to reclaim
226 * the vnodes from the VM object cache.
227 */
228#define VNODE_FREE_ENOUGH (VNODE_FREE_MIN + (VNODE_FREE_MIN/2))
229
230/*
231 * Initialize the vnode management data structures.
232 */
0b4e3aa0 233__private_extern__ void
1c79356b
A
234vntblinit()
235{
236 extern struct lock__bsd__ exchangelock;
237
238 simple_lock_init(&mountlist_slock);
239 simple_lock_init(&mntvnode_slock);
240 simple_lock_init(&mntid_slock);
241 simple_lock_init(&spechash_slock);
242 TAILQ_INIT(&vnode_free_list);
243 simple_lock_init(&vnode_free_list_slock);
244 TAILQ_INIT(&vnode_inactive_list);
245 CIRCLEQ_INIT(&mountlist);
246 lockinit(&exchangelock, PVFS, "exchange", 0, 0);
247
248 if (!vnodetarget)
249 vnodetarget = VNODE_FREE_TARGET;
250
251 /*
252 * Scale the vm_object_cache to accomodate the vnodes
253 * we want to cache
254 */
255 (void) adjust_vm_object_cache(0, desiredvnodes - VNODE_FREE_MIN);
256}
257
258/* Reset the VM Object Cache with the values passed in */
0b4e3aa0 259__private_extern__ kern_return_t
1c79356b
A
260reset_vmobjectcache(unsigned int val1, unsigned int val2)
261{
262 vm_size_t oval = val1 - VNODE_FREE_MIN;
263 vm_size_t nval = val2 - VNODE_FREE_MIN;
264
265 return(adjust_vm_object_cache(oval, nval));
266}
267
268/*
269 * Mark a mount point as busy. Used to synchronize access and to delay
270 * unmounting. Interlock is not released on failure.
271 */
272int
273vfs_busy(mp, flags, interlkp, p)
274 struct mount *mp;
275 int flags;
276 struct slock *interlkp;
277 struct proc *p;
278{
279 int lkflags;
280
281 if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
282 if (flags & LK_NOWAIT)
283 return (ENOENT);
284 mp->mnt_kern_flag |= MNTK_MWAIT;
285 if (interlkp)
286 simple_unlock(interlkp);
287 /*
288 * Since all busy locks are shared except the exclusive
289 * lock granted when unmounting, the only place that a
290 * wakeup needs to be done is at the release of the
291 * exclusive lock at the end of dounmount.
292 */
293 sleep((caddr_t)mp, PVFS);
294 if (interlkp)
295 simple_lock(interlkp);
296 return (ENOENT);
297 }
298 lkflags = LK_SHARED;
299 if (interlkp)
300 lkflags |= LK_INTERLOCK;
301 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
302 panic("vfs_busy: unexpected lock failure");
303 return (0);
304}
305
306/*
307 * Free a busy filesystem.
308 */
309void
310vfs_unbusy(mp, p)
311 struct mount *mp;
312 struct proc *p;
313{
314
315 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
316}
317
318/*
319 * Lookup a filesystem type, and if found allocate and initialize
320 * a mount structure for it.
321 *
322 * Devname is usually updated by mount(8) after booting.
323 */
324int
325vfs_rootmountalloc(fstypename, devname, mpp)
326 char *fstypename;
327 char *devname;
328 struct mount **mpp;
329{
330 struct proc *p = current_proc(); /* XXX */
331 struct vfsconf *vfsp;
332 struct mount *mp;
333
334 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
335 if (!strcmp(vfsp->vfc_name, fstypename))
336 break;
337 if (vfsp == NULL)
338 return (ENODEV);
339 mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
340 bzero((char *)mp, (u_long)sizeof(struct mount));
0b4e3aa0
A
341
342 /* Initialize the default IO constraints */
343 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
344 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
345
1c79356b
A
346 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
347 (void)vfs_busy(mp, LK_NOWAIT, 0, p);
348 LIST_INIT(&mp->mnt_vnodelist);
349 mp->mnt_vfc = vfsp;
350 mp->mnt_op = vfsp->vfc_vfsops;
351 mp->mnt_flag = MNT_RDONLY;
352 mp->mnt_vnodecovered = NULLVP;
353 vfsp->vfc_refcount++;
354 mp->mnt_stat.f_type = vfsp->vfc_typenum;
355 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
356 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
357 mp->mnt_stat.f_mntonname[0] = '/';
358 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
359 *mpp = mp;
360 return (0);
361}
362
363/*
364 * Find an appropriate filesystem to use for the root. If a filesystem
365 * has not been preselected, walk through the list of known filesystems
366 * trying those that have mountroot routines, and try them until one
367 * works or we have tried them all.
368 */
369int
370vfs_mountroot()
371{
372 struct vfsconf *vfsp;
373 extern int (*mountroot)(void);
374 int error;
375
376 if (mountroot != NULL) {
377 error = (*mountroot)();
378 return (error);
379 }
380
381 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
382 if (vfsp->vfc_mountroot == NULL)
383 continue;
384 if ((error = (*vfsp->vfc_mountroot)()) == 0)
385 return (0);
386 if (error != EINVAL)
387 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
388 }
389 return (ENODEV);
390}
391
392/*
393 * Lookup a mount point by filesystem identifier.
394 */
395struct mount *
396vfs_getvfs(fsid)
397 fsid_t *fsid;
398{
399 register struct mount *mp;
400
401 simple_lock(&mountlist_slock);
402 for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
403 mp = mp->mnt_list.cqe_next) {
404 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
405 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
406 simple_unlock(&mountlist_slock);
407 return (mp);
408 }
409 }
410 simple_unlock(&mountlist_slock);
411 return ((struct mount *)0);
412}
413
414/*
415 * Get a new unique fsid
416 */
417void
418vfs_getnewfsid(mp)
419 struct mount *mp;
420{
421static u_short xxxfs_mntid;
422
423 fsid_t tfsid;
424 int mtype;
425
426 simple_lock(&mntid_slock);
427 mtype = mp->mnt_vfc->vfc_typenum;
428 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
429 mp->mnt_stat.f_fsid.val[1] = mtype;
430 if (xxxfs_mntid == 0)
431 ++xxxfs_mntid;
432 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
433 tfsid.val[1] = mtype;
434 if (mountlist.cqh_first != (void *)&mountlist) {
435 while (vfs_getvfs(&tfsid)) {
436 tfsid.val[0]++;
437 xxxfs_mntid++;
438 }
439 }
440 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
441 simple_unlock(&mntid_slock);
442}
443
444/*
445 * Set vnode attributes to VNOVAL
446 */
447void
448vattr_null(vap)
449 register struct vattr *vap;
450{
451
452 vap->va_type = VNON;
453 vap->va_size = vap->va_bytes = VNOVAL;
454 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
455 vap->va_fsid = vap->va_fileid =
456 vap->va_blocksize = vap->va_rdev =
457 vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
458 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
459 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
460 vap->va_flags = vap->va_gen = VNOVAL;
461 vap->va_vaflags = 0;
462}
463
464/*
465 * Routines having to do with the management of the vnode table.
466 */
467extern int (**dead_vnodeop_p)(void *);
468static void vclean __P((struct vnode *vp, int flag, struct proc *p));
469extern void vgonel __P((struct vnode *vp, struct proc *p));
470long numvnodes, freevnodes;
471long inactivevnodes;
472long vnode_reclaim_tried;
473long vnode_objects_reclaimed;
474
475
476extern struct vattr va_null;
477
478/*
479 * Return the next vnode from the free list.
480 */
481int
482getnewvnode(tag, mp, vops, vpp)
483 enum vtagtype tag;
484 struct mount *mp;
485 int (**vops)(void *);
486 struct vnode **vpp;
487{
488 struct proc *p = current_proc(); /* XXX */
489 struct vnode *vp;
490 int cnt, didretry = 0;
491 static int reused = 0; /* track the reuse rate */
492 int reclaimhits = 0;
493
494retry:
495 simple_lock(&vnode_free_list_slock);
496 /*
497 * MALLOC a vnode if the number of vnodes has not reached the desired
498 * value and the number on the free list is still reasonable...
499 * reuse from the freelist even though we may evict a name cache entry
500 * to reduce the number of vnodes that accumulate.... vnodes tie up
501 * wired memory and are never garbage collected
502 */
503 if (numvnodes < desiredvnodes && (freevnodes < (2 * VNODE_FREE_MIN))) {
504 numvnodes++;
505 simple_unlock(&vnode_free_list_slock);
506 MALLOC_ZONE(vp, struct vnode *, sizeof *vp, M_VNODE, M_WAITOK);
507 bzero((char *)vp, sizeof *vp);
508 VLISTNONE(vp); /* avoid double queue removal */
509 simple_lock_init(&vp->v_interlock);
510 goto done;
511 }
512
513 /*
514 * Once the desired number of vnodes are allocated,
515 * we start reusing the vnodes.
516 */
517 if (freevnodes < VNODE_FREE_MIN) {
518 /*
519 * if we are low on vnodes on the freelist attempt to get
520 * some back from the inactive list and VM object cache
521 */
522 simple_unlock(&vnode_free_list_slock);
523 (void)vnreclaim(vnodetarget);
524 simple_lock(&vnode_free_list_slock);
525 }
526 if (numvnodes >= desiredvnodes && reused > VNODE_TOOMANY_REUSED) {
527 reused = 0;
528 if (freevnodes < VNODE_FREE_ENOUGH) {
529 simple_unlock(&vnode_free_list_slock);
530 (void)vnreclaim(vnodetarget);
531 simple_lock(&vnode_free_list_slock);
532 }
533 }
534
535 for (cnt = 0, vp = vnode_free_list.tqh_first;
536 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
537 if (simple_lock_try(&vp->v_interlock)) {
538 /* got the interlock */
539 if (ISSET(vp->v_flag, VORECLAIM)) {
540 /* skip over the vnodes that are being reclaimed */
541 simple_unlock(&vp->v_interlock);
542 reclaimhits++;
543 } else
544 break;
545 }
546 }
547
548 /*
549 * Unless this is a bad time of the month, at most
550 * the first NCPUS items on the free list are
551 * locked, so this is close enough to being empty.
552 */
553 if (vp == NULLVP) {
554 simple_unlock(&vnode_free_list_slock);
555 if (!(didretry++) && (vnreclaim(vnodetarget) > 0))
556 goto retry;
557 tablefull("vnode");
558 log(LOG_EMERG, "%d vnodes locked, %d desired, %d numvnodes, "
559 "%d free, %d inactive, %d being reclaimed\n",
560 cnt, desiredvnodes, numvnodes, freevnodes, inactivevnodes,
561 reclaimhits);
562 *vpp = 0;
563 return (ENFILE);
564 }
565
566 if (vp->v_usecount)
567 panic("free vnode isn't: v_type = %d, v_usecount = %d?",
568 vp->v_type, vp->v_usecount);
569
570 VREMFREE("getnewvnode", vp);
571 reused++;
572 simple_unlock(&vnode_free_list_slock);
573 vp->v_lease = NULL;
574 cache_purge(vp);
575 if (vp->v_type != VBAD)
576 vgonel(vp, p); /* clean and reclaim the vnode */
577 else
578 simple_unlock(&vp->v_interlock);
579#if DIAGNOSTIC
580 if (vp->v_data)
581 panic("cleaned vnode isn't");
582 {
583 int s = splbio();
584 if (vp->v_numoutput)
585 panic("Clean vnode has pending I/O's");
586 splx(s);
587 }
588#endif
589 if (UBCINFOEXISTS(vp))
590 panic("getnewvnode: ubcinfo not cleaned");
591 else
592 vp->v_ubcinfo = 0;
593
594 vp->v_lastr = -1;
595 vp->v_ralen = 0;
596 vp->v_maxra = 0;
597 vp->v_lastw = 0;
598 vp->v_ciosiz = 0;
599 vp->v_cstart = 0;
600 vp->v_clen = 0;
601 vp->v_socket = 0;
602
603done:
604 vp->v_flag = VSTANDARD;
605 vp->v_type = VNON;
606 vp->v_tag = tag;
607 vp->v_op = vops;
608 insmntque(vp, mp);
609 *vpp = vp;
610 vp->v_usecount = 1;
611 vp->v_data = 0;
612 return (0);
613}
614
615/*
616 * Move a vnode from one mount queue to another.
617 */
618void
619insmntque(vp, mp)
620 struct vnode *vp;
621 struct mount *mp;
622{
623
624 simple_lock(&mntvnode_slock);
625 /*
626 * Delete from old mount point vnode list, if on one.
627 */
628 if (vp->v_mount != NULL)
629 LIST_REMOVE(vp, v_mntvnodes);
630 /*
631 * Insert into list of vnodes for the new mount point, if available.
632 */
633 if ((vp->v_mount = mp) != NULL)
634 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
635 simple_unlock(&mntvnode_slock);
636}
637
fa4905b1
A
638__inline void
639vpwakeup(struct vnode *vp)
640{
641 if (vp) {
642 if (--vp->v_numoutput < 0)
643 panic("vpwakeup: neg numoutput");
644 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
645 if (vp->v_numoutput < 0)
646 panic("vpwakeup: neg numoutput 2");
647 vp->v_flag &= ~VBWAIT;
648 wakeup((caddr_t)&vp->v_numoutput);
649 }
650 }
651}
652
1c79356b
A
653/*
654 * Update outstanding I/O count and do wakeup if requested.
655 */
656void
657vwakeup(bp)
658 register struct buf *bp;
659{
660 register struct vnode *vp;
661
662 CLR(bp->b_flags, B_WRITEINPROG);
fa4905b1 663 vpwakeup(bp->b_vp);
1c79356b
A
664}
665
666/*
667 * Flush out and invalidate all buffers associated with a vnode.
668 * Called with the underlying object locked.
669 */
670int
671vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
672 register struct vnode *vp;
673 int flags;
674 struct ucred *cred;
675 struct proc *p;
676 int slpflag, slptimeo;
677{
678 register struct buf *bp;
679 struct buf *nbp, *blist;
680 int s, error = 0;
681
682 if (flags & V_SAVE) {
683 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) {
684 return (error);
685 }
686 if (vp->v_dirtyblkhd.lh_first != NULL || (vp->v_flag & VHASDIRTY))
687 panic("vinvalbuf: dirty bufs");
688 }
689
690 for (;;) {
691 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
692 while (blist && blist->b_lblkno < 0)
693 blist = blist->b_vnbufs.le_next;
694 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
695 (flags & V_SAVEMETA))
696 while (blist && blist->b_lblkno < 0)
697 blist = blist->b_vnbufs.le_next;
698 if (!blist)
699 break;
700
701 for (bp = blist; bp; bp = nbp) {
702 nbp = bp->b_vnbufs.le_next;
703 if (flags & V_SAVEMETA && bp->b_lblkno < 0)
704 continue;
705 s = splbio();
706 if (ISSET(bp->b_flags, B_BUSY)) {
707 SET(bp->b_flags, B_WANTED);
708 error = tsleep((caddr_t)bp,
709 slpflag | (PRIBIO + 1), "vinvalbuf",
710 slptimeo);
711 splx(s);
712 if (error) {
713 return (error);
714 }
715 break;
716 }
717 bremfree(bp);
718 SET(bp->b_flags, B_BUSY);
719 splx(s);
720 /*
721 * XXX Since there are no node locks for NFS, I believe
722 * there is a slight chance that a delayed write will
723 * occur while sleeping just above, so check for it.
724 */
725 if (ISSET(bp->b_flags, B_DELWRI) && (flags & V_SAVE)) {
726 (void) VOP_BWRITE(bp);
727 break;
728 }
729 SET(bp->b_flags, B_INVAL);
730 brelse(bp);
731 }
732 }
733 if (!(flags & V_SAVEMETA) &&
734 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
735 panic("vinvalbuf: flush failed");
736 return (0);
737}
738
739/*
740 * Associate a buffer with a vnode.
741 */
742void
743bgetvp(vp, bp)
744 register struct vnode *vp;
745 register struct buf *bp;
746{
747
748 if (bp->b_vp)
749 panic("bgetvp: not free");
750 VHOLD(vp);
751 bp->b_vp = vp;
752 if (vp->v_type == VBLK || vp->v_type == VCHR)
753 bp->b_dev = vp->v_rdev;
754 else
755 bp->b_dev = NODEV;
756 /*
757 * Insert onto list for new vnode.
758 */
759 bufinsvn(bp, &vp->v_cleanblkhd);
760}
761
762/*
763 * Disassociate a buffer from a vnode.
764 */
765void
766brelvp(bp)
767 register struct buf *bp;
768{
769 struct vnode *vp;
770
771 if (bp->b_vp == (struct vnode *) 0)
772 panic("brelvp: NULL");
773 /*
774 * Delete from old vnode list, if on one.
775 */
776 if (bp->b_vnbufs.le_next != NOLIST)
777 bufremvn(bp);
778 vp = bp->b_vp;
779 bp->b_vp = (struct vnode *) 0;
780 HOLDRELE(vp);
781}
782
783/*
784 * Reassign a buffer from one vnode to another.
785 * Used to assign file specific control information
786 * (indirect blocks) to the vnode to which they belong.
787 */
788void
789reassignbuf(bp, newvp)
790 register struct buf *bp;
791 register struct vnode *newvp;
792{
793 register struct buflists *listheadp;
794
795 if (newvp == NULL) {
796 printf("reassignbuf: NULL");
797 return;
798 }
799 /*
800 * Delete from old vnode list, if on one.
801 */
802 if (bp->b_vnbufs.le_next != NOLIST)
803 bufremvn(bp);
804 /*
805 * If dirty, put on list of dirty buffers;
806 * otherwise insert onto list of clean buffers.
807 */
808 if (ISSET(bp->b_flags, B_DELWRI))
809 listheadp = &newvp->v_dirtyblkhd;
810 else
811 listheadp = &newvp->v_cleanblkhd;
812 bufinsvn(bp, listheadp);
813}
814
815/*
816 * Create a vnode for a block device.
817 * Used for root filesystem, argdev, and swap areas.
818 * Also used for memory file system special devices.
819 */
820int
821bdevvp(dev, vpp)
822 dev_t dev;
823 struct vnode **vpp;
824{
825 register struct vnode *vp;
826 struct vnode *nvp;
827 int error;
828
829 if (dev == NODEV) {
830 *vpp = NULLVP;
831 return (ENODEV);
832 }
833 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
834 if (error) {
835 *vpp = NULLVP;
836 return (error);
837 }
838 vp = nvp;
839 vp->v_type = VBLK;
840 if (nvp = checkalias(vp, dev, (struct mount *)0)) {
841 vput(vp);
842 vp = nvp;
843 }
844 *vpp = vp;
845 return (0);
846}
847
848/*
849 * Check to see if the new vnode represents a special device
850 * for which we already have a vnode (either because of
851 * bdevvp() or because of a different vnode representing
852 * the same block device). If such an alias exists, deallocate
853 * the existing contents and return the aliased vnode. The
854 * caller is responsible for filling it with its new contents.
855 */
856struct vnode *
857checkalias(nvp, nvp_rdev, mp)
858 register struct vnode *nvp;
859 dev_t nvp_rdev;
860 struct mount *mp;
861{
862 struct proc *p = current_proc(); /* XXX */
863 struct vnode *vp;
864 struct vnode **vpp;
865 struct specinfo * bufhold;
866 int buffree = 1;
867
868 if (nvp->v_type != VBLK && nvp->v_type != VCHR)
869 return (NULLVP);
870
871 bufhold = (struct specinfo *)_MALLOC_ZONE(sizeof(struct specinfo),
872 M_VNODE, M_WAITOK);
873 vpp = &speclisth[SPECHASH(nvp_rdev)];
874loop:
875 simple_lock(&spechash_slock);
876 for (vp = *vpp; vp; vp = vp->v_specnext) {
877 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
878 continue;
879 /*
880 * Alias, but not in use, so flush it out.
881 */
882 simple_lock(&vp->v_interlock);
883 if (vp->v_usecount == 0) {
884 simple_unlock(&spechash_slock);
885 vgonel(vp, p);
886 goto loop;
887 }
888 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
889 simple_unlock(&spechash_slock);
890 goto loop;
891 }
892 break;
893 }
894 if (vp == NULL || vp->v_tag != VT_NON) {
895 nvp->v_specinfo = bufhold;
896 buffree = 0; /* buffer used */
897 bzero(nvp->v_specinfo, sizeof(struct specinfo));
898 nvp->v_rdev = nvp_rdev;
899 nvp->v_hashchain = vpp;
900 nvp->v_specnext = *vpp;
901 nvp->v_specflags = 0;
902 simple_unlock(&spechash_slock);
903 *vpp = nvp;
904 if (vp != NULLVP) {
905 nvp->v_flag |= VALIASED;
906 vp->v_flag |= VALIASED;
907 vput(vp);
908 }
909 /* Since buffer is used just return */
910 return (NULLVP);
911 }
912 simple_unlock(&spechash_slock);
913 VOP_UNLOCK(vp, 0, p);
914 simple_lock(&vp->v_interlock);
915 vclean(vp, 0, p);
916 vp->v_op = nvp->v_op;
917 vp->v_tag = nvp->v_tag;
918 nvp->v_type = VNON;
919 insmntque(vp, mp);
920 if (buffree)
921 _FREE_ZONE((void *)bufhold, sizeof (struct specinfo), M_VNODE);
922 return (vp);
923}
924
925/*
0b4e3aa0
A
926 * Get a reference on a particular vnode and lock it if requested.
927 * If the vnode was on the inactive list, remove it from the list.
928 * If the vnode was on the free list, remove it from the list and
929 * move it to inactive list as needed.
930 * The vnode lock bit is set if the vnode is being eliminated in
931 * vgone. The process is awakened when the transition is completed,
932 * and an error returned to indicate that the vnode is no longer
933 * usable (possibly having been changed to a new file system type).
1c79356b
A
934 */
935int
936vget(vp, flags, p)
937 struct vnode *vp;
938 int flags;
939 struct proc *p;
940{
941 int error = 0;
942
943 /*
944 * If the vnode is in the process of being cleaned out for
945 * another use, we wait for the cleaning to finish and then
946 * return failure. Cleaning is determined by checking that
947 * the VXLOCK flag is set.
948 */
949 if ((flags & LK_INTERLOCK) == 0)
950 simple_lock(&vp->v_interlock);
951 if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
952 vp->v_flag |= VXWANT;
953 simple_unlock(&vp->v_interlock);
0b4e3aa0 954 (void)tsleep((caddr_t)vp, PINOD, "vget", 0);
1c79356b
A
955 return (ENOENT);
956 }
957
958 /*
959 * vnode is being terminated.
960 * wait for vnode_pager_no_senders() to clear VTERMINATE
961 */
962 if (ISSET(vp->v_flag, VTERMINATE)) {
963 SET(vp->v_flag, VTERMWANT);
964 simple_unlock(&vp->v_interlock);
0b4e3aa0 965 (void)tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vclean", 0);
1c79356b
A
966 return (ENOENT);
967 }
968
969 simple_lock(&vnode_free_list_slock);
1c79356b 970 if (vp->v_usecount == 0) {
0b4e3aa0 971 /* If on the free list, remove it from there */
1c79356b
A
972 if (VONLIST(vp))
973 VREMFREE("vget", vp);
974 } else {
975 /* If on the inactive list, remove it from there */
976 if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
977 if (VONLIST(vp))
978 VREMINACTIVE("vget", vp);
979 }
980 }
981
982 /* The vnode should not be on the inactive list here */
983 VINACTIVECHECK("vget", vp, 0);
984
985 simple_unlock(&vnode_free_list_slock);
0b4e3aa0 986
1c79356b
A
987 if (++vp->v_usecount <= 0)
988 panic("vget: v_usecount");
989
0b4e3aa0
A
990 /*
991 * Recover named reference as needed
992 */
993 if (UBCISVALID(vp) && !ubc_issetflags(vp, UI_HASOBJREF)) {
994 simple_unlock(&vp->v_interlock);
995 if (ubc_getobject(vp, UBC_HOLDOBJECT)) {
996 error = ENOENT;
997 goto errout;
1c79356b 998 }
0b4e3aa0
A
999 simple_lock(&vp->v_interlock);
1000 }
1001
1002 if (flags & LK_TYPE_MASK) {
1003 if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
1004 goto errout;
1005 return (0);
1c79356b
A
1006 }
1007
0b4e3aa0
A
1008 if ((flags & LK_INTERLOCK) == 0)
1009 simple_unlock(&vp->v_interlock);
1010 return (0);
1011
1012errout:
1c79356b 1013 /*
0b4e3aa0
A
1014 * If the vnode was not active in the first place
1015 * must not call vrele() as VOP_INACTIVE() is not
1016 * required.
1017 * So inlined part of vrele() here.
1c79356b 1018 */
0b4e3aa0
A
1019 simple_lock(&vp->v_interlock);
1020 if (--vp->v_usecount == 1) {
1021 if (UBCINFOEXISTS(vp)) {
1022 vinactive(vp);
1c79356b 1023 simple_unlock(&vp->v_interlock);
0b4e3aa0 1024 return (error);
1c79356b 1025 }
1c79356b 1026 }
0b4e3aa0 1027 if (vp->v_usecount > 0) {
1c79356b 1028 simple_unlock(&vp->v_interlock);
0b4e3aa0
A
1029 return (error);
1030 }
1031 if (vp->v_usecount < 0)
1032 panic("vget: negative usecount (%d)", vp->v_usecount);
1033 vfree(vp);
1034 simple_unlock(&vp->v_interlock);
1035 return (error);
1036}
1037
1038/*
1039 * Get a pager reference on the particular vnode.
1040 *
1041 * This is called from ubc_info_init() and it is asumed that
1042 * the vnode is neither on the free list on on the inactive list.
1043 * It is also assumed that the vnode is neither being recycled
1044 * by vgonel nor being terminated by vnode_pager_vrele().
1045 *
1046 * The vnode interlock is NOT held by the caller.
1047 */
1048__private_extern__ int
1049vnode_pager_vget(vp)
1050 struct vnode *vp;
1051{
1052 simple_lock(&vp->v_interlock);
1053 if (UBCINFOMISSING(vp))
1054 panic("vnode_pager_vget: stolen ubc_info");
1055
1056 if (!UBCINFOEXISTS(vp))
1057 panic("vnode_pager_vget: lost ubc_info");
1058
1059 if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM))
1060 panic("vnode_pager_vget: already being reclaimd");
1061
1062 if (ISSET(vp->v_flag, VTERMINATE))
1063 panic("vnode_pager_vget: already being terminated");
1064
1065 simple_lock(&vnode_free_list_slock);
1066 /* The vnode should not be on ANY list */
1067 if (VONLIST(vp))
1068 panic("vnode_pager_vget: still on the list");
1069
1070 /* The vnode should not be on the inactive list here */
1071 VINACTIVECHECK("vnode_pager_vget", vp, 0);
1072 simple_unlock(&vnode_free_list_slock);
1073
1074 /* After all those checks, now do the real work :-) */
1075 if (++vp->v_usecount <= 0)
1076 panic("vnode_pager_vget: v_usecount");
1077 simple_unlock(&vp->v_interlock);
1078
1c79356b
A
1079 return (0);
1080}
1081
1082/*
1083 * Stubs to use when there is no locking to be done on the underlying object.
1084 * A minimal shared lock is necessary to ensure that the underlying object
1085 * is not revoked while an operation is in progress. So, an active shared
1086 * count is maintained in an auxillary vnode lock structure.
1087 */
1088int
1089vop_nolock(ap)
1090 struct vop_lock_args /* {
1091 struct vnode *a_vp;
1092 int a_flags;
1093 struct proc *a_p;
1094 } */ *ap;
1095{
1096#ifdef notyet
1097 /*
1098 * This code cannot be used until all the non-locking filesystems
1099 * (notably NFS) are converted to properly lock and release nodes.
1100 * Also, certain vnode operations change the locking state within
1101 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
1102 * and symlink). Ideally these operations should not change the
1103 * lock state, but should be changed to let the caller of the
1104 * function unlock them. Otherwise all intermediate vnode layers
1105 * (such as union, umapfs, etc) must catch these functions to do
1106 * the necessary locking at their layer. Note that the inactive
1107 * and lookup operations also change their lock state, but this
1108 * cannot be avoided, so these two operations will always need
1109 * to be handled in intermediate layers.
1110 */
1111 struct vnode *vp = ap->a_vp;
1112 int vnflags, flags = ap->a_flags;
1113
1114 if (vp->v_vnlock == NULL) {
1115 if ((flags & LK_TYPE_MASK) == LK_DRAIN)
1116 return (0);
1117 MALLOC_ZONE(vp->v_vnlock, struct lock__bsd__ *,
1118 sizeof(struct lock__bsd__), M_VNODE, M_WAITOK);
1119 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
1120 }
1121 switch (flags & LK_TYPE_MASK) {
1122 case LK_DRAIN:
1123 vnflags = LK_DRAIN;
1124 break;
1125 case LK_EXCLUSIVE:
1126 case LK_SHARED:
1127 vnflags = LK_SHARED;
1128 break;
1129 case LK_UPGRADE:
1130 case LK_EXCLUPGRADE:
1131 case LK_DOWNGRADE:
1132 return (0);
1133 case LK_RELEASE:
1134 default:
1135 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
1136 }
1137 if (flags & LK_INTERLOCK)
1138 vnflags |= LK_INTERLOCK;
1139 return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
1140#else /* for now */
1141 /*
1142 * Since we are not using the lock manager, we must clear
1143 * the interlock here.
1144 */
1145 if (ap->a_flags & LK_INTERLOCK)
1146 simple_unlock(&ap->a_vp->v_interlock);
1147 return (0);
1148#endif
1149}
1150
1151/*
1152 * Decrement the active use count.
1153 */
1154int
1155vop_nounlock(ap)
1156 struct vop_unlock_args /* {
1157 struct vnode *a_vp;
1158 int a_flags;
1159 struct proc *a_p;
1160 } */ *ap;
1161{
1162 struct vnode *vp = ap->a_vp;
1163
1164 if (vp->v_vnlock == NULL)
1165 return (0);
1166 return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p));
1167}
1168
1169/*
1170 * Return whether or not the node is in use.
1171 */
1172int
1173vop_noislocked(ap)
1174 struct vop_islocked_args /* {
1175 struct vnode *a_vp;
1176 } */ *ap;
1177{
1178 struct vnode *vp = ap->a_vp;
1179
1180 if (vp->v_vnlock == NULL)
1181 return (0);
1182 return (lockstatus(vp->v_vnlock));
1183}
1184
1185/*
1186 * Vnode reference.
1187 */
1188void
1189vref(vp)
1190 struct vnode *vp;
1191{
1192
1193 simple_lock(&vp->v_interlock);
1194 if (vp->v_usecount <= 0)
1195 panic("vref used where vget required");
1196
1197 /* If on the inactive list, remove it from there */
1198 if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) {
1199 if (VONLIST(vp)) {
1200 simple_lock(&vnode_free_list_slock);
1201 VREMINACTIVE("vref", vp);
1202 simple_unlock(&vnode_free_list_slock);
1203 }
1204 }
1205 /* The vnode should not be on the inactive list here */
1206 VINACTIVECHECK("vref", vp, 0);
1207
1208 if (++vp->v_usecount <= 0)
1209 panic("vref v_usecount");
1210 simple_unlock(&vp->v_interlock);
1211}
1212
1213/*
1214 * put the vnode on appropriate free list.
1215 * called with v_interlock held.
1216 */
1217static void
1218vfree(vp)
1219 struct vnode *vp;
1220{
1221 /*
1222 * if the vnode is not obtained by calling getnewvnode() we
1223 * are not responsible for the cleanup. Just return.
1224 */
1225 if (!(vp->v_flag & VSTANDARD)) {
1226 return;
1227 }
1228
1229 if (vp->v_usecount != 0)
1230 panic("vfree: v_usecount");
1231
1232 /* insert at tail of LRU list or at head if VAGE is set */
1233 simple_lock(&vnode_free_list_slock);
1234
1235 if (VONLIST(vp))
1236 panic("vfree: vnode still on list");
1237
1238 if (vp->v_flag & VAGE) {
1239 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1240 vp->v_flag &= ~VAGE;
1241 } else
1242 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1243 freevnodes++;
1244 simple_unlock(&vnode_free_list_slock);
1245 return;
1246}
1247
1248/*
1249 * put the vnode on the inactive list.
1250 * called with v_interlock held
1251 */
1252static void
1253vinactive(vp)
1254 struct vnode *vp;
1255{
1256 if (!UBCINFOEXISTS(vp))
1257 panic("vinactive: not a UBC vnode");
1258
1259 if (vp->v_usecount != 1)
1260 panic("vinactive: v_usecount");
1261
1262 simple_lock(&vnode_free_list_slock);
1263
1264 if (VONLIST(vp))
1265 panic("vinactive: vnode still on list");
1266 VINACTIVECHECK("vinactive", vp, 0);
1267
1268 TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_freelist);
1269 SET(vp->v_flag, VUINACTIVE);
1270 CLR(vp->v_flag, (VNOCACHE_DATA | VRAOFF));
1271
1272 inactivevnodes++;
1273 simple_unlock(&vnode_free_list_slock);
1274 return;
1275}
1276
1277
1278/*
1279 * vput(), just unlock and vrele()
1280 */
1281void
1282vput(vp)
1283 struct vnode *vp;
1284{
1285 struct proc *p = current_proc(); /* XXX */
1286
1c79356b
A
1287 simple_lock(&vp->v_interlock);
1288 if (--vp->v_usecount == 1) {
1289 if (UBCINFOEXISTS(vp)) {
1290 vinactive(vp);
1291 simple_unlock(&vp->v_interlock);
1292 VOP_UNLOCK(vp, 0, p);
1293 return;
1294 }
1295 }
1296 if (vp->v_usecount > 0) {
1297 simple_unlock(&vp->v_interlock);
1298 VOP_UNLOCK(vp, 0, p);
1299 return;
1300 }
1301#if DIAGNOSTIC
1302 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1303 vprint("vput: bad ref count", vp);
1304 panic("vput: v_usecount = %d, v_writecount = %d",
1305 vp->v_usecount, vp->v_writecount);
1306 }
1307#endif
1308 if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
1309 VREMINACTIVE("vrele", vp);
1310
1311 simple_unlock(&vp->v_interlock);
1312 VOP_INACTIVE(vp, p);
1313 /*
1314 * The interlock is not held and
1315 * VOP_INCATIVE releases the vnode lock.
1316 * We could block and the vnode might get reactivated
1317 * Can not just call vfree without checking the state
1318 */
1319 simple_lock(&vp->v_interlock);
1320 if (!VONLIST(vp)) {
1321 if (vp->v_usecount == 0)
1322 vfree(vp);
1323 else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp))
1324 vinactive(vp);
1325 }
1326 simple_unlock(&vp->v_interlock);
1327}
1328
1329/*
1330 * Vnode release.
1331 * If count drops to zero, call inactive routine and return to freelist.
1332 */
1333void
1334vrele(vp)
1335 struct vnode *vp;
1336{
1337 struct proc *p = current_proc(); /* XXX */
1338
1c79356b
A
1339 simple_lock(&vp->v_interlock);
1340 if (--vp->v_usecount == 1) {
1341 if (UBCINFOEXISTS(vp)) {
1342 vinactive(vp);
1343 simple_unlock(&vp->v_interlock);
1344 return;
1345 }
1346 }
1347 if (vp->v_usecount > 0) {
1348 simple_unlock(&vp->v_interlock);
1349 return;
1350 }
1351#if DIAGNOSTIC
1352 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1353 vprint("vrele: bad ref count", vp);
1354 panic("vrele: ref cnt");
1355 }
1356#endif
1357 if (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))
1358 VREMINACTIVE("vrele", vp);
1359
1360
1361 if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
1362 /* vnode is being cleaned, just return */
1363 vfree(vp);
1364 simple_unlock(&vp->v_interlock);
1365 return;
1366 }
1367
1368 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
1369 VOP_INACTIVE(vp, p);
1370 /*
1371 * vn_lock releases the interlock and
1372 * VOP_INCATIVE releases the vnode lock.
1373 * We could block and the vnode might get reactivated
1374 * Can not just call vfree without checking the state
1375 */
1376 simple_lock(&vp->v_interlock);
1377 if (!VONLIST(vp)) {
1378 if (vp->v_usecount == 0)
1379 vfree(vp);
1380 else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp))
1381 vinactive(vp);
1382 }
1383 simple_unlock(&vp->v_interlock);
1384 }
1385#if 0
1386 else {
1387 vfree(vp);
1388 simple_unlock(&vp->v_interlock);
1389 kprintf("vrele: vn_lock() failed for vp = 0x%08x\n", vp);
1390 }
1391#endif
1392}
1393
1394void
1395vagevp(vp)
1396 struct vnode *vp;
1397{
1c79356b
A
1398 simple_lock(&vp->v_interlock);
1399 vp->v_flag |= VAGE;
1400 simple_unlock(&vp->v_interlock);
1401 return;
1402}
1403
1404/*
1405 * Page or buffer structure gets a reference.
1406 */
1407void
1408vhold(vp)
1409 register struct vnode *vp;
1410{
1411
1412 simple_lock(&vp->v_interlock);
1413 vp->v_holdcnt++;
1414 simple_unlock(&vp->v_interlock);
1415}
1416
1417/*
1418 * Page or buffer structure frees a reference.
1419 */
1420void
1421holdrele(vp)
1422 register struct vnode *vp;
1423{
1424
1425 simple_lock(&vp->v_interlock);
1426 if (vp->v_holdcnt <= 0)
1427 panic("holdrele: holdcnt");
1428 vp->v_holdcnt--;
1429 simple_unlock(&vp->v_interlock);
1430}
1431
1432/*
1433 * Remove any vnodes in the vnode table belonging to mount point mp.
1434 *
1435 * If MNT_NOFORCE is specified, there should not be any active ones,
1436 * return error if any are found (nb: this is a user error, not a
1437 * system error). If MNT_FORCE is specified, detach any active vnodes
1438 * that are found.
1439 */
1440#if DIAGNOSTIC
1441int busyprt = 0; /* print out busy vnodes */
1442#if 0
1443struct ctldebug debug1 = { "busyprt", &busyprt };
1444#endif /* 0 */
1445#endif
1446
1447int
1448vflush(mp, skipvp, flags)
1449 struct mount *mp;
1450 struct vnode *skipvp;
1451 int flags;
1452{
0b4e3aa0 1453 struct proc *p = current_proc();
1c79356b
A
1454 struct vnode *vp, *nvp;
1455 int busy = 0;
1456
1457 simple_lock(&mntvnode_slock);
1458loop:
1459 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1460 if (vp->v_mount != mp)
1461 goto loop;
1462 nvp = vp->v_mntvnodes.le_next;
1463 /*
1464 * Skip over a selected vnode.
1465 */
1466 if (vp == skipvp)
1467 continue;
1468
1469 simple_lock(&vp->v_interlock);
1470 /*
1471 * Skip over a vnodes marked VSYSTEM.
1472 */
1473 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1474 simple_unlock(&vp->v_interlock);
1475 continue;
1476 }
1477 /*
1478 * Skip over a vnodes marked VSWAP.
1479 */
1480 if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) {
1481 simple_unlock(&vp->v_interlock);
1482 continue;
1483 }
1484 /*
1485 * If WRITECLOSE is set, only flush out regular file
1486 * vnodes open for writing.
1487 */
1488 if ((flags & WRITECLOSE) &&
1489 (vp->v_writecount == 0 || vp->v_type != VREG)) {
1490 simple_unlock(&vp->v_interlock);
1491 continue;
1492 }
1493 /*
1494 * With v_usecount == 0, all we need to do is clear
1495 * out the vnode data structures and we are done.
1496 */
1497 if (vp->v_usecount == 0) {
1498 simple_unlock(&mntvnode_slock);
1499 vgonel(vp, p);
1500 simple_lock(&mntvnode_slock);
1501 continue;
1502 }
1503 /*
1504 * If FORCECLOSE is set, forcibly close the vnode.
1505 * For block or character devices, revert to an
1506 * anonymous device. For all other files, just kill them.
1507 */
1508 if (flags & FORCECLOSE) {
1509 simple_unlock(&mntvnode_slock);
1510 if (vp->v_type != VBLK && vp->v_type != VCHR) {
1511 vgonel(vp, p);
1512 } else {
1513 vclean(vp, 0, p);
1514 vp->v_op = spec_vnodeop_p;
1515 insmntque(vp, (struct mount *)0);
1516 }
1517 simple_lock(&mntvnode_slock);
1518 continue;
1519 }
1520#if DIAGNOSTIC
1521 if (busyprt)
1522 vprint("vflush: busy vnode", vp);
1523#endif
1524 simple_unlock(&vp->v_interlock);
1525 busy++;
1526 }
1527 simple_unlock(&mntvnode_slock);
1528 if (busy)
1529 return (EBUSY);
1530 return (0);
1531}
1532
1533/*
1534 * Disassociate the underlying file system from a vnode.
1535 * The vnode interlock is held on entry.
1536 */
1537static void
1538vclean(vp, flags, p)
1539 struct vnode *vp;
1540 int flags;
1541 struct proc *p;
1542{
1543 int active;
1544 void *obj;
0b4e3aa0 1545 kern_return_t kret;
1c79356b 1546 int removed = 0;
0b4e3aa0 1547 int didhold;
1c79356b
A
1548
1549 /*
1550 * if the vnode is not obtained by calling getnewvnode() we
1551 * are not responsible for the cleanup. Just return.
1552 */
1553 if (!(vp->v_flag & VSTANDARD)) {
1554 simple_unlock(&vp->v_interlock);
1555 return;
1556 }
1557
1558 /*
1559 * Check to see if the vnode is in use.
1560 * If so we have to reference it before we clean it out
1561 * so that its count cannot fall to zero and generate a
1562 * race against ourselves to recycle it.
1563 */
1564 if (active = vp->v_usecount)
1565 if (++vp->v_usecount <= 0)
1566 panic("vclean: v_usecount");
1567 /*
1568 * Prevent the vnode from being recycled or
1569 * brought into use while we clean it out.
1570 */
1571 if (vp->v_flag & VXLOCK)
1572 panic("vclean: deadlock");
1573 vp->v_flag |= VXLOCK;
1574
1575 /*
1576 * Even if the count is zero, the VOP_INACTIVE routine may still
1577 * have the object locked while it cleans it out. The VOP_LOCK
1578 * ensures that the VOP_INACTIVE routine is done with its work.
1579 * For active vnodes, it ensures that no other activity can
1580 * occur while the underlying object is being cleaned out.
1581 */
1582 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1583
1584 /*
1585 * if this vnode is on the inactive list
1586 * take it off the list.
1587 */
1588 if ((active == 1) &&
1589 (ISSET((vp)->v_flag, VUINACTIVE) && VONLIST(vp))) {
1590 simple_lock(&vnode_free_list_slock);
1591 VREMINACTIVE("vclean", vp);
1592 simple_unlock(&vnode_free_list_slock);
1593 removed++;
1594 }
1595
1596 /* Clean the pages in VM. */
0b4e3aa0
A
1597 if (active && (flags & DOCLOSE))
1598 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1599
1600 /* Clean the pages in VM. */
1601 didhold = ubc_hold(vp);
1602 if ((active) && (didhold))
1c79356b 1603 (void)ubc_clean(vp, 0); /* do not invalidate */
1c79356b
A
1604
1605 /*
1606 * Clean out any buffers associated with the vnode.
1607 */
1608 if (flags & DOCLOSE) {
1609 if (vp->v_tag == VT_NFS)
0b4e3aa0
A
1610 nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
1611 else
1612 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1613 }
1614
1615 if (active)
1c79356b 1616 VOP_INACTIVE(vp, p);
0b4e3aa0 1617 else
1c79356b 1618 VOP_UNLOCK(vp, 0, p);
0b4e3aa0
A
1619
1620 /* Destroy ubc named reference */
1621 if (didhold) {
1622 ubc_rele(vp);
1623 ubc_destroy_named(vp);
1c79356b 1624 }
0b4e3aa0 1625
1c79356b
A
1626 /*
1627 * Reclaim the vnode.
1628 */
1629 if (VOP_RECLAIM(vp, p))
1630 panic("vclean: cannot reclaim");
1c79356b
A
1631 cache_purge(vp);
1632 if (vp->v_vnlock) {
1633 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1634 vprint("vclean: lock not drained", vp);
1635 FREE_ZONE(vp->v_vnlock, sizeof (struct lock__bsd__), M_VNODE);
1636 vp->v_vnlock = NULL;
1637 }
1638
1639 /* It's dead, Jim! */
1640 vp->v_op = dead_vnodeop_p;
1641 vp->v_tag = VT_NON;
1642
1c79356b
A
1643 /*
1644 * Done with purge, notify sleepers of the grim news.
1645 */
1646 vp->v_flag &= ~VXLOCK;
1647 if (vp->v_flag & VXWANT) {
1648 vp->v_flag &= ~VXWANT;
1649 wakeup((caddr_t)vp);
1650 }
0b4e3aa0
A
1651
1652 if (active)
1653 vrele(vp);
1c79356b
A
1654}
1655
1656/*
1657 * Eliminate all activity associated with the requested vnode
1658 * and with all vnodes aliased to the requested vnode.
1659 */
1660int
1661vop_revoke(ap)
1662 struct vop_revoke_args /* {
1663 struct vnode *a_vp;
1664 int a_flags;
1665 } */ *ap;
1666{
1667 struct vnode *vp, *vq;
0b4e3aa0 1668 struct proc *p = current_proc();
1c79356b
A
1669
1670#if DIAGNOSTIC
1671 if ((ap->a_flags & REVOKEALL) == 0)
1672 panic("vop_revoke");
1673#endif
1674
1675 vp = ap->a_vp;
1676 simple_lock(&vp->v_interlock);
1677
1678 if (vp->v_flag & VALIASED) {
1679 /*
1680 * If a vgone (or vclean) is already in progress,
1681 * wait until it is done and return.
1682 */
1683 if (vp->v_flag & VXLOCK) {
1684 while (vp->v_flag & VXLOCK) {
1685 vp->v_flag |= VXWANT;
1686 simple_unlock(&vp->v_interlock);
0b4e3aa0 1687 (void)tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1c79356b
A
1688 }
1689 return (0);
1690 }
1691 /*
1692 * Ensure that vp will not be vgone'd while we
1693 * are eliminating its aliases.
1694 */
1695 vp->v_flag |= VXLOCK;
1696 simple_unlock(&vp->v_interlock);
1697 while (vp->v_flag & VALIASED) {
1698 simple_lock(&spechash_slock);
1699 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1700 if (vq->v_rdev != vp->v_rdev ||
1701 vq->v_type != vp->v_type || vp == vq)
1702 continue;
1703 simple_unlock(&spechash_slock);
1704 vgone(vq);
1705 break;
1706 }
1707 if (vq == NULLVP)
1708 simple_unlock(&spechash_slock);
1709 }
1710 /*
1711 * Remove the lock so that vgone below will
1712 * really eliminate the vnode after which time
1713 * vgone will awaken any sleepers.
1714 */
1715 simple_lock(&vp->v_interlock);
1716 vp->v_flag &= ~VXLOCK;
1717 }
1718 vgonel(vp, p);
1719 return (0);
1720}
1721
1722/*
1723 * Recycle an unused vnode to the front of the free list.
1724 * Release the passed interlock if the vnode will be recycled.
1725 */
1726int
1727vrecycle(vp, inter_lkp, p)
1728 struct vnode *vp;
1729 struct slock *inter_lkp;
1730 struct proc *p;
1731{
1732
1733 simple_lock(&vp->v_interlock);
1734 if (vp->v_usecount == 0) {
1735 if (inter_lkp)
1736 simple_unlock(inter_lkp);
1737 vgonel(vp, p);
1738 return (1);
1739 }
1740 simple_unlock(&vp->v_interlock);
1741 return (0);
1742}
1743
1744/*
1745 * Eliminate all activity associated with a vnode
1746 * in preparation for reuse.
1747 */
1748void
1749vgone(vp)
1750 struct vnode *vp;
1751{
0b4e3aa0 1752 struct proc *p = current_proc();
1c79356b
A
1753
1754 simple_lock(&vp->v_interlock);
1755 vgonel(vp, p);
1756}
1757
1758/*
1759 * vgone, with the vp interlock held.
1760 */
1761void
1762vgonel(vp, p)
1763 struct vnode *vp;
1764 struct proc *p;
1765{
1766 struct vnode *vq;
1767 struct vnode *vx;
1768
1769 /*
1770 * if the vnode is not obtained by calling getnewvnode() we
1771 * are not responsible for the cleanup. Just return.
1772 */
1773 if (!(vp->v_flag & VSTANDARD)) {
1774 simple_unlock(&vp->v_interlock);
1775 return;
1776 }
1777
1778 /*
1779 * If a vgone (or vclean) is already in progress,
1780 * wait until it is done and return.
1781 */
1782 if (vp->v_flag & VXLOCK) {
1783 while (vp->v_flag & VXLOCK) {
1784 vp->v_flag |= VXWANT;
1785 simple_unlock(&vp->v_interlock);
0b4e3aa0 1786 (void)tsleep((caddr_t)vp, PINOD, "vgone", 0);
1c79356b
A
1787 }
1788 return;
1789 }
1790 /*
1791 * Clean out the filesystem specific data.
1792 */
1793 vclean(vp, DOCLOSE, p);
1794 /*
1795 * Delete from old mount point vnode list, if on one.
1796 */
1797 if (vp->v_mount != NULL)
1798 insmntque(vp, (struct mount *)0);
1799 /*
1800 * If special device, remove it from special device alias list
1801 * if it is on one.
1802 */
1803 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1804 simple_lock(&spechash_slock);
1805 if (*vp->v_hashchain == vp) {
1806 *vp->v_hashchain = vp->v_specnext;
1807 } else {
1808 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1809 if (vq->v_specnext != vp)
1810 continue;
1811 vq->v_specnext = vp->v_specnext;
1812 break;
1813 }
1814 if (vq == NULL)
1815 panic("missing bdev");
1816 }
1817 if (vp->v_flag & VALIASED) {
1818 vx = NULL;
1819 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1820 if (vq->v_rdev != vp->v_rdev ||
1821 vq->v_type != vp->v_type)
1822 continue;
1823 if (vx)
1824 break;
1825 vx = vq;
1826 }
1827 if (vx == NULL)
1828 panic("missing alias");
1829 if (vq == NULL)
1830 vx->v_flag &= ~VALIASED;
1831 vp->v_flag &= ~VALIASED;
1832 }
1833 simple_unlock(&spechash_slock);
1834 FREE_ZONE(vp->v_specinfo, sizeof (struct specinfo), M_VNODE);
1835 vp->v_specinfo = NULL;
1836 }
1837 /*
1838 * If it is on the freelist and not already at the head,
1839 * move it to the head of the list. The test of the back
1840 * pointer and the reference count of zero is because
1841 * it will be removed from the free list by getnewvnode,
1842 * but will not have its reference count incremented until
1843 * after calling vgone. If the reference count were
1844 * incremented first, vgone would (incorrectly) try to
1845 * close the previous instance of the underlying object.
1846 * So, the back pointer is explicitly set to `0xdeadb' in
1847 * getnewvnode after removing it from the freelist to ensure
1848 * that we do not try to move it here.
1849 */
1850 if (vp->v_usecount == 0) {
1851 simple_lock(&vnode_free_list_slock);
1852 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1853 vnode_free_list.tqh_first != vp) {
1854 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1855 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1856 }
1857 simple_unlock(&vnode_free_list_slock);
1858 }
1859 vp->v_type = VBAD;
1860}
1861
1862/*
1863 * Lookup a vnode by device number.
1864 */
1865int
1866vfinddev(dev, type, vpp)
1867 dev_t dev;
1868 enum vtype type;
1869 struct vnode **vpp;
1870{
1871 struct vnode *vp;
1872 int rc = 0;
1873
1874 simple_lock(&spechash_slock);
1875 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1876 if (dev != vp->v_rdev || type != vp->v_type)
1877 continue;
1878 *vpp = vp;
1879 rc = 1;
1880 break;
1881 }
1882 simple_unlock(&spechash_slock);
1883 return (rc);
1884}
1885
1886/*
1887 * Calculate the total number of references to a special device.
1888 */
1889int
1890vcount(vp)
1891 struct vnode *vp;
1892{
1893 struct vnode *vq, *vnext;
1894 int count;
1895
1896loop:
1897 if ((vp->v_flag & VALIASED) == 0)
1898 return (vp->v_usecount);
1899 simple_lock(&spechash_slock);
1900 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1901 vnext = vq->v_specnext;
1902 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1903 continue;
1904 /*
1905 * Alias, but not in use, so flush it out.
1906 */
1907 if (vq->v_usecount == 0 && vq != vp) {
1908 simple_unlock(&spechash_slock);
1909 vgone(vq);
1910 goto loop;
1911 }
1912 count += vq->v_usecount;
1913 }
1914 simple_unlock(&spechash_slock);
1915 return (count);
1916}
1917
1918int prtactive = 0; /* 1 => print out reclaim of active vnodes */
1919
1920/*
1921 * Print out a description of a vnode.
1922 */
1923static char *typename[] =
1924 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1925
1926void
1927vprint(label, vp)
1928 char *label;
1929 register struct vnode *vp;
1930{
1931 char buf[64];
1932
1933 if (label != NULL)
1934 printf("%s: ", label);
1935 printf("type %s, usecount %d, writecount %d, refcount %d,",
1936 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1937 vp->v_holdcnt);
1938 buf[0] = '\0';
1939 if (vp->v_flag & VROOT)
1940 strcat(buf, "|VROOT");
1941 if (vp->v_flag & VTEXT)
1942 strcat(buf, "|VTEXT");
1943 if (vp->v_flag & VSYSTEM)
1944 strcat(buf, "|VSYSTEM");
1945 if (vp->v_flag & VXLOCK)
1946 strcat(buf, "|VXLOCK");
1947 if (vp->v_flag & VXWANT)
1948 strcat(buf, "|VXWANT");
1949 if (vp->v_flag & VBWAIT)
1950 strcat(buf, "|VBWAIT");
1951 if (vp->v_flag & VALIASED)
1952 strcat(buf, "|VALIASED");
1953 if (buf[0] != '\0')
1954 printf(" flags (%s)", &buf[1]);
1955 if (vp->v_data == NULL) {
1956 printf("\n");
1957 } else {
1958 printf("\n\t");
1959 VOP_PRINT(vp);
1960 }
1961}
1962
1963#ifdef DEBUG
1964/*
1965 * List all of the locked vnodes in the system.
1966 * Called when debugging the kernel.
1967 */
1968void
1969printlockedvnodes()
1970{
0b4e3aa0 1971 struct proc *p = current_proc();
1c79356b
A
1972 struct mount *mp, *nmp;
1973 struct vnode *vp;
1974
1975 printf("Locked vnodes\n");
1976 simple_lock(&mountlist_slock);
1977 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1978 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1979 nmp = mp->mnt_list.cqe_next;
1980 continue;
1981 }
1982 for (vp = mp->mnt_vnodelist.lh_first;
1983 vp != NULL;
1984 vp = vp->v_mntvnodes.le_next) {
1985 if (VOP_ISLOCKED(vp))
1986 vprint((char *)0, vp);
1987 }
1988 simple_lock(&mountlist_slock);
1989 nmp = mp->mnt_list.cqe_next;
1990 vfs_unbusy(mp, p);
1991 }
1992 simple_unlock(&mountlist_slock);
1993}
1994#endif
1995
1996/*
1997 * Top level filesystem related information gathering.
1998 */
1999int
2000vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
2001 int *name;
2002 u_int namelen;
2003 void *oldp;
2004 size_t *oldlenp;
2005 void *newp;
2006 size_t newlen;
2007 struct proc *p;
2008{
2009 struct ctldebug *cdp;
2010 struct vfsconf *vfsp;
2011
2012 if (name[0] == VFS_NUMMNTOPS) {
2013 extern unsigned int vfs_nummntops;
2014 return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops));
2015 }
2016
2017 /* all sysctl names at this level are at least name and field */
2018 if (namelen < 2)
2019 return (ENOTDIR); /* overloaded */
2020 if (name[0] != VFS_GENERIC) {
2021 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2022 if (vfsp->vfc_typenum == name[0])
2023 break;
2024 if (vfsp == NULL)
2025 return (EOPNOTSUPP);
2026 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
2027 oldp, oldlenp, newp, newlen, p));
2028 }
2029 switch (name[1]) {
2030 case VFS_MAXTYPENUM:
2031 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
2032 case VFS_CONF:
2033 if (namelen < 3)
2034 return (ENOTDIR); /* overloaded */
2035 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2036 if (vfsp->vfc_typenum == name[2])
2037 break;
2038 if (vfsp == NULL)
2039 return (EOPNOTSUPP);
2040 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
2041 sizeof(struct vfsconf)));
2042 }
2043 return (EOPNOTSUPP);
2044}
2045
2046int kinfo_vdebug = 1;
2047#define KINFO_VNODESLOP 10
2048/*
2049 * Dump vnode list (via sysctl).
2050 * Copyout address of vnode followed by vnode.
2051 */
2052/* ARGSUSED */
2053int
2054sysctl_vnode(where, sizep, p)
2055 char *where;
2056 size_t *sizep;
2057 struct proc *p;
2058{
2059 struct mount *mp, *nmp;
2060 struct vnode *nvp, *vp;
2061 char *bp = where, *savebp;
2062 char *ewhere;
2063 int error;
2064
2065#define VPTRSZ sizeof (struct vnode *)
2066#define VNODESZ sizeof (struct vnode)
2067 if (where == NULL) {
2068 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2069 return (0);
2070 }
2071 ewhere = where + *sizep;
2072
2073 simple_lock(&mountlist_slock);
2074 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2075 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
2076 nmp = mp->mnt_list.cqe_next;
2077 continue;
2078 }
2079 savebp = bp;
2080again:
2081 simple_lock(&mntvnode_slock);
2082 for (vp = mp->mnt_vnodelist.lh_first;
2083 vp != NULL;
2084 vp = nvp) {
2085 /*
2086 * Check that the vp is still associated with
2087 * this filesystem. RACE: could have been
2088 * recycled onto the same filesystem.
2089 */
2090 if (vp->v_mount != mp) {
2091 simple_unlock(&mntvnode_slock);
2092 if (kinfo_vdebug)
2093 printf("kinfo: vp changed\n");
2094 bp = savebp;
2095 goto again;
2096 }
2097 nvp = vp->v_mntvnodes.le_next;
2098 if (bp + VPTRSZ + VNODESZ > ewhere) {
2099 simple_unlock(&mntvnode_slock);
2100 *sizep = bp - where;
2101 return (ENOMEM);
2102 }
2103 simple_unlock(&mntvnode_slock);
2104 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
2105 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
2106 return (error);
2107 bp += VPTRSZ + VNODESZ;
2108 simple_lock(&mntvnode_slock);
2109 }
2110 simple_unlock(&mntvnode_slock);
2111 simple_lock(&mountlist_slock);
2112 nmp = mp->mnt_list.cqe_next;
2113 vfs_unbusy(mp, p);
2114 }
2115 simple_unlock(&mountlist_slock);
2116
2117 *sizep = bp - where;
2118 return (0);
2119}
2120
2121/*
2122 * Check to see if a filesystem is mounted on a block device.
2123 */
2124int
2125vfs_mountedon(vp)
2126 struct vnode *vp;
2127{
2128 struct vnode *vq;
2129 int error = 0;
2130
2131 if (vp->v_specflags & SI_MOUNTEDON)
2132 return (EBUSY);
2133 if (vp->v_flag & VALIASED) {
2134 simple_lock(&spechash_slock);
2135 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
2136 if (vq->v_rdev != vp->v_rdev ||
2137 vq->v_type != vp->v_type)
2138 continue;
2139 if (vq->v_specflags & SI_MOUNTEDON) {
2140 error = EBUSY;
2141 break;
2142 }
2143 }
2144 simple_unlock(&spechash_slock);
2145 }
2146 return (error);
2147}
2148
2149/*
2150 * Unmount all filesystems. The list is traversed in reverse order
2151 * of mounting to avoid dependencies.
2152 */
0b4e3aa0 2153__private_extern__ void
1c79356b
A
2154vfs_unmountall()
2155{
2156 struct mount *mp, *nmp;
0b4e3aa0 2157 struct proc *p = current_proc();
1c79356b
A
2158
2159 /*
2160 * Since this only runs when rebooting, it is not interlocked.
2161 */
2162 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2163 nmp = mp->mnt_list.cqe_prev;
2164 (void) dounmount(mp, MNT_FORCE, p);
2165 }
2166}
2167
2168/*
2169 * Build hash lists of net addresses and hang them off the mount point.
0b4e3aa0 2170 * Called by vfs_export() to set up the lists of export addresses.
1c79356b
A
2171 */
2172static int
2173vfs_hang_addrlist(mp, nep, argp)
2174 struct mount *mp;
2175 struct netexport *nep;
2176 struct export_args *argp;
2177{
2178 register struct netcred *np;
2179 register struct radix_node_head *rnh;
2180 register int i;
2181 struct radix_node *rn;
2182 struct sockaddr *saddr, *smask = 0;
2183 struct domain *dom;
2184 int error;
2185
2186 if (argp->ex_addrlen == 0) {
2187 if (mp->mnt_flag & MNT_DEFEXPORTED)
2188 return (EPERM);
2189 np = &nep->ne_defexported;
2190 np->netc_exflags = argp->ex_flags;
2191 np->netc_anon = argp->ex_anon;
2192 np->netc_anon.cr_ref = 1;
2193 mp->mnt_flag |= MNT_DEFEXPORTED;
2194 return (0);
2195 }
2196 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
2197 MALLOC(np, struct netcred *, i, M_NETADDR, M_WAITOK);
2198 bzero((caddr_t)np, i);
2199 saddr = (struct sockaddr *)(np + 1);
2200 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
2201 goto out;
2202 if (saddr->sa_len > argp->ex_addrlen)
2203 saddr->sa_len = argp->ex_addrlen;
2204 if (argp->ex_masklen) {
2205 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
2206 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
2207 if (error)
2208 goto out;
2209 if (smask->sa_len > argp->ex_masklen)
2210 smask->sa_len = argp->ex_masklen;
2211 }
2212 i = saddr->sa_family;
2213 if ((rnh = nep->ne_rtable[i]) == 0) {
2214 /*
2215 * Seems silly to initialize every AF when most are not
2216 * used, do so on demand here
2217 */
2218 for (dom = domains; dom; dom = dom->dom_next)
2219 if (dom->dom_family == i && dom->dom_rtattach) {
2220 dom->dom_rtattach((void **)&nep->ne_rtable[i],
2221 dom->dom_rtoffset);
2222 break;
2223 }
2224 if ((rnh = nep->ne_rtable[i]) == 0) {
2225 error = ENOBUFS;
2226 goto out;
2227 }
2228 }
2229 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
2230 np->netc_rnodes);
2231 if (rn == 0) {
2232 /*
2233 * One of the reasons that rnh_addaddr may fail is that
2234 * the entry already exists. To check for this case, we
2235 * look up the entry to see if it is there. If so, we
2236 * do not need to make a new entry but do return success.
2237 */
2238 _FREE(np, M_NETADDR);
2239 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
2240 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
2241 ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
2242 !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
2243 (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
2244 return (0);
2245 return (EPERM);
2246 }
2247 np->netc_exflags = argp->ex_flags;
2248 np->netc_anon = argp->ex_anon;
2249 np->netc_anon.cr_ref = 1;
2250 return (0);
2251out:
2252 _FREE(np, M_NETADDR);
2253 return (error);
2254}
2255
2256/* ARGSUSED */
2257static int
2258vfs_free_netcred(rn, w)
2259 struct radix_node *rn;
2260 caddr_t w;
2261{
2262 register struct radix_node_head *rnh = (struct radix_node_head *)w;
2263
2264 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
2265 _FREE((caddr_t)rn, M_NETADDR);
2266 return (0);
2267}
2268
2269/*
2270 * Free the net address hash lists that are hanging off the mount points.
2271 */
2272static void
2273vfs_free_addrlist(nep)
2274 struct netexport *nep;
2275{
2276 register int i;
2277 register struct radix_node_head *rnh;
2278
2279 for (i = 0; i <= AF_MAX; i++)
2280 if (rnh = nep->ne_rtable[i]) {
2281 (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
2282 (caddr_t)rnh);
2283 _FREE((caddr_t)rnh, M_RTABLE);
2284 nep->ne_rtable[i] = 0;
2285 }
2286}
2287
2288int
2289vfs_export(mp, nep, argp)
2290 struct mount *mp;
2291 struct netexport *nep;
2292 struct export_args *argp;
2293{
2294 int error;
2295
2296 if (argp->ex_flags & MNT_DELEXPORT) {
2297 vfs_free_addrlist(nep);
2298 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
2299 }
2300 if (argp->ex_flags & MNT_EXPORTED) {
2301 if (error = vfs_hang_addrlist(mp, nep, argp))
2302 return (error);
2303 mp->mnt_flag |= MNT_EXPORTED;
2304 }
2305 return (0);
2306}
2307
2308struct netcred *
2309vfs_export_lookup(mp, nep, nam)
2310 register struct mount *mp;
2311 struct netexport *nep;
2312 struct mbuf *nam;
2313{
2314 register struct netcred *np;
2315 register struct radix_node_head *rnh;
2316 struct sockaddr *saddr;
2317
2318 np = NULL;
2319 if (mp->mnt_flag & MNT_EXPORTED) {
2320 /*
2321 * Lookup in the export list first.
2322 */
2323 if (nam != NULL) {
2324 saddr = mtod(nam, struct sockaddr *);
2325 rnh = nep->ne_rtable[saddr->sa_family];
2326 if (rnh != NULL) {
2327 np = (struct netcred *)
2328 (*rnh->rnh_matchaddr)((caddr_t)saddr,
2329 rnh);
2330 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
2331 np = NULL;
2332 }
2333 }
2334 /*
2335 * If no address match, use the default if it exists.
2336 */
2337 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
2338 np = &nep->ne_defexported;
2339 }
2340 return (np);
2341}
2342
2343/*
2344 * try to reclaim vnodes from the memory
2345 * object cache
2346 */
0b4e3aa0 2347static int
1c79356b
A
2348vm_object_cache_reclaim(int count)
2349{
2350 int cnt;
2351 void vnode_pager_release_from_cache(int *);
2352
2353 /* attempt to reclaim vnodes from VM object cache */
2354 cnt = count;
2355 vnode_pager_release_from_cache(&cnt);
2356 return(cnt);
2357}
2358
2359/*
2360 * Release memory object reference held by inactive vnodes
2361 * and then try to reclaim some vnodes from the memory
2362 * object cache
2363 */
0b4e3aa0 2364static int
1c79356b
A
2365vnreclaim(int count)
2366{
2367 int cnt, i, loopcnt;
2368 void *obj;
2369 struct vnode *vp;
2370 int err;
2371 struct proc *p;
0b4e3aa0 2372 kern_return_t kret;
1c79356b
A
2373
2374 i = 0;
2375 loopcnt = 0;
2376
2377 /* Try to release "count" vnodes from the inactive list */
2378restart:
2379 if (++loopcnt > inactivevnodes) {
2380 /*
2381 * I did my best trying to reclaim the vnodes.
2382 * Do not try any more as that would only lead to
2383 * long latencies. Also in the worst case
2384 * this can get totally CPU bound.
2385 * Just fall though and attempt a reclaim of VM
2386 * object cache
2387 */
2388 goto out;
2389 }
2390
2391 simple_lock(&vnode_free_list_slock);
2392 for (vp = TAILQ_FIRST(&vnode_inactive_list);
2393 (vp != NULLVP) && (i < count);
2394 vp = TAILQ_NEXT(vp, v_freelist)) {
0b4e3aa0
A
2395
2396 if (!simple_lock_try(&vp->v_interlock))
2397 continue;
1c79356b 2398
0b4e3aa0
A
2399 if (vp->v_usecount != 1)
2400 panic("vnreclaim: v_usecount");
1c79356b 2401
0b4e3aa0
A
2402 if(!UBCINFOEXISTS(vp)) {
2403 if (vp->v_type == VBAD) {
2404 VREMINACTIVE("vnreclaim", vp);
1c79356b 2405 simple_unlock(&vp->v_interlock);
0b4e3aa0
A
2406 continue;
2407 } else
2408 panic("non UBC vnode on inactive list");
2409 /* Should not reach here */
2410 }
1c79356b 2411
0b4e3aa0
A
2412 /* If vnode is already being reclaimed, wait */
2413 if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) {
2414 vp->v_flag |= VXWANT;
2415 simple_unlock(&vp->v_interlock);
1c79356b 2416 simple_unlock(&vnode_free_list_slock);
0b4e3aa0
A
2417 (void)tsleep((caddr_t)vp, PINOD, "vocr", 0);
2418 goto restart;
2419 }
1c79356b 2420
0b4e3aa0
A
2421 VREMINACTIVE("vnreclaim", vp);
2422 simple_unlock(&vnode_free_list_slock);
1c79356b 2423
0b4e3aa0
A
2424 if (ubc_issetflags(vp, UI_WASMAPPED)) {
2425 /*
2426 * We should not reclaim as it is likely
2427 * to be in use. Let it die a natural death.
2428 * Release the UBC reference if one exists
2429 * and put it back at the tail.
2430 */
2431 simple_unlock(&vp->v_interlock);
2432 if (ubc_release_named(vp)) {
2433 if (UBCINFOEXISTS(vp)) {
1c79356b 2434 simple_lock(&vp->v_interlock);
0b4e3aa0
A
2435 if (vp->v_usecount == 1 && !VONLIST(vp))
2436 vinactive(vp);
1c79356b 2437 simple_unlock(&vp->v_interlock);
1c79356b 2438 }
0b4e3aa0
A
2439 } else {
2440 simple_lock(&vp->v_interlock);
2441 vinactive(vp);
1c79356b 2442 simple_unlock(&vp->v_interlock);
0b4e3aa0
A
2443 }
2444 } else {
2445 int didhold;
1c79356b 2446
0b4e3aa0 2447 VORECLAIM_ENABLE(vp);
1c79356b 2448
0b4e3aa0
A
2449 /*
2450 * scrub the dirty pages and invalidate the buffers
2451 */
2452 p = current_proc();
2453 err = vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p);
2454 if (err) {
2455 /* cannot reclaim */
1c79356b 2456 simple_lock(&vp->v_interlock);
0b4e3aa0 2457 vinactive(vp);
1c79356b
A
2458 VORECLAIM_DISABLE(vp);
2459 i++;
2460 simple_unlock(&vp->v_interlock);
0b4e3aa0 2461 goto restart;
1c79356b 2462 }
0b4e3aa0
A
2463
2464 /* keep the vnode alive so we can kill it */
2465 simple_lock(&vp->v_interlock);
2466 if(vp->v_usecount != 1)
2467 panic("VOCR: usecount race");
2468 vp->v_usecount++;
2469 simple_unlock(&vp->v_interlock);
2470
2471 /* clean up the state in VM without invalidating */
2472 didhold = ubc_hold(vp);
2473 if (didhold)
2474 (void)ubc_clean(vp, 0);
2475
2476 /* flush and invalidate buffers associated with the vnode */
2477 if (vp->v_tag == VT_NFS)
2478 nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
2479 else
2480 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
2481
2482 /*
2483 * Note: for the v_usecount == 2 case, VOP_INACTIVE
2484 * has not yet been called. Call it now while vp is
2485 * still locked, it will also release the lock.
2486 */
2487 if (vp->v_usecount == 2)
2488 VOP_INACTIVE(vp, p);
2489 else
2490 VOP_UNLOCK(vp, 0, p);
2491
2492 if (didhold)
2493 ubc_rele(vp);
2494
2495 /*
2496 * destroy the ubc named reference.
2497 * If we can't because it is held for I/Os
2498 * in progress, just put it back on the inactive
2499 * list and move on. Otherwise, the paging reference
2500 * is toast (and so is this vnode?).
2501 */
2502 if (ubc_destroy_named(vp)) {
2503 i++;
2504 }
2505 simple_lock(&vp->v_interlock);
2506 VORECLAIM_DISABLE(vp);
2507 simple_unlock(&vp->v_interlock);
2508 vrele(vp); /* release extra use we added here */
1c79356b 2509 }
0b4e3aa0
A
2510 /* inactive list lock was released, must restart */
2511 goto restart;
1c79356b
A
2512 }
2513 simple_unlock(&vnode_free_list_slock);
2514
2515 vnode_reclaim_tried += i;
2516out:
2517 i = vm_object_cache_reclaim(count);
2518 vnode_objects_reclaimed += i;
2519
2520 return(i);
2521}
2522
2523/*
2524 * This routine is called from vnode_pager_no_senders()
2525 * which in turn can be called with vnode locked by vnode_uncache()
2526 * But it could also get called as a result of vm_object_cache_trim().
2527 * In that case lock state is unknown.
2528 * AGE the vnode so that it gets recycled quickly.
2529 * Check lock status to decide whether to call vput() or vrele().
2530 */
0b4e3aa0 2531__private_extern__ void
1c79356b
A
2532vnode_pager_vrele(struct vnode *vp)
2533{
2534
2535 boolean_t funnel_state;
2536 int isvnreclaim = 1;
2537
2538 if (vp == (struct vnode *) NULL)
2539 panic("vnode_pager_vrele: null vp");
2540
2541 funnel_state = thread_funnel_set(kernel_flock, TRUE);
2542
2543 /* Mark the vnode to be recycled */
2544 vagevp(vp);
2545
2546 simple_lock(&vp->v_interlock);
2547 /*
2548 * If a vgone (or vclean) is already in progress,
2549 * Do not bother with the ubc_info cleanup.
2550 * Let the vclean deal with it.
2551 */
2552 if (vp->v_flag & VXLOCK) {
2553 CLR(vp->v_flag, VTERMINATE);
2554 if (ISSET(vp->v_flag, VTERMWANT)) {
2555 CLR(vp->v_flag, VTERMWANT);
2556 wakeup((caddr_t)&vp->v_ubcinfo);
2557 }
2558 simple_unlock(&vp->v_interlock);
2559 vrele(vp);
2560 (void) thread_funnel_set(kernel_flock, funnel_state);
2561 return;
2562 }
2563
2564 /* It's dead, Jim! */
2565 if (!ISSET(vp->v_flag, VORECLAIM)) {
2566 /*
2567 * called as a result of eviction of the memory
2568 * object from the memory object cache
2569 */
2570 isvnreclaim = 0;
2571
2572 /* So serialize vnode operations */
2573 VORECLAIM_ENABLE(vp);
2574 }
2575 if (!ISSET(vp->v_flag, VTERMINATE))
2576 SET(vp->v_flag, VTERMINATE);
2577 if (UBCINFOEXISTS(vp)) {
0b4e3aa0
A
2578 struct ubc_info *uip = vp->v_ubcinfo;
2579
1c79356b
A
2580 if (ubc_issetflags(vp, UI_WASMAPPED))
2581 SET(vp->v_flag, VWASMAPPED);
2582
1c79356b 2583 vp->v_ubcinfo = UBC_NOINFO; /* catch bad accesses */
0b4e3aa0
A
2584 simple_unlock(&vp->v_interlock);
2585 ubc_info_deallocate(uip);
1c79356b
A
2586 } else {
2587 if ((vp->v_type == VBAD) && ((vp)->v_ubcinfo != UBC_INFO_NULL)
2588 && ((vp)->v_ubcinfo != UBC_NOINFO)) {
0b4e3aa0
A
2589 struct ubc_info *uip = vp->v_ubcinfo;
2590
1c79356b 2591 vp->v_ubcinfo = UBC_NOINFO; /* catch bad accesses */
0b4e3aa0
A
2592 simple_unlock(&vp->v_interlock);
2593 ubc_info_deallocate(uip);
1c79356b
A
2594 } else {
2595 simple_unlock(&vp->v_interlock);
2596 }
2597 }
2598
2599 CLR(vp->v_flag, VTERMINATE);
2600
2601 if (vp->v_type != VBAD){
2602 vgone(vp); /* revoke the vnode */
2603 vrele(vp); /* and drop the reference */
2604 } else
2605 vrele(vp);
2606
2607 if (ISSET(vp->v_flag, VTERMWANT)) {
2608 CLR(vp->v_flag, VTERMWANT);
2609 wakeup((caddr_t)&vp->v_ubcinfo);
2610 }
2611 if (!isvnreclaim)
2612 VORECLAIM_DISABLE(vp);
2613 (void) thread_funnel_set(kernel_flock, funnel_state);
2614 return;
2615}
2616
2617
2618#if DIAGNOSTIC
2619int walk_vnodes_debug=0;
2620
2621void
2622walk_allvnodes()
2623{
0b4e3aa0 2624 struct proc *p = current_proc();
1c79356b
A
2625 struct mount *mp, *nmp;
2626 struct vnode *vp;
2627 int cnt = 0;
2628
2629 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2630 for (vp = mp->mnt_vnodelist.lh_first;
2631 vp != NULL;
2632 vp = vp->v_mntvnodes.le_next) {
2633 if (vp->v_usecount < 0){
2634 if(walk_vnodes_debug) {
2635 printf("vp is %x\n",vp);
2636 }
2637 }
2638 }
2639 nmp = mp->mnt_list.cqe_next;
2640 }
2641 for (cnt = 0, vp = vnode_free_list.tqh_first;
2642 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2643 if ((vp->v_usecount < 0) && walk_vnodes_debug) {
2644 if(walk_vnodes_debug) {
2645 printf("vp is %x\n",vp);
2646 }
2647 }
2648 }
2649 printf("%d - free\n", cnt);
2650
2651 for (cnt = 0, vp = vnode_inactive_list.tqh_first;
2652 vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) {
2653 if ((vp->v_usecount < 0) && walk_vnodes_debug) {
2654 if(walk_vnodes_debug) {
2655 printf("vp is %x\n",vp);
2656 }
2657 }
2658 }
2659 printf("%d - inactive\n", cnt);
2660}
2661#endif /* DIAGNOSTIC */
0b4e3aa0
A
2662
2663void
2664vfs_io_attributes(vp, flags, iosize, vectors)
2665 struct vnode *vp;
2666 int flags; /* B_READ or B_WRITE */
2667 int *iosize;
2668 int *vectors;
2669{
2670 struct mount *mp;
2671
2672 /* start with "reasonable" defaults */
2673 *iosize = MAXPHYS;
2674 *vectors = 32;
2675
2676 mp = vp->v_mount;
2677 if (mp != NULL) {
2678 switch (flags) {
2679 case B_READ:
2680 *iosize = mp->mnt_maxreadcnt;
2681 *vectors = mp->mnt_segreadcnt;
2682 break;
2683 case B_WRITE:
2684 *iosize = mp->mnt_maxwritecnt;
2685 *vectors = mp->mnt_segwritecnt;
2686 break;
2687 default:
2688 break;
2689 }
2690 }
2691
2692 return;
2693}
2694
2695#include <dev/disk.h>
2696
2697int
2698vfs_init_io_attributes(devvp, mp)
2699 struct vnode *devvp;
2700 struct mount *mp;
2701{
2702 int error;
2703 off_t readblockcnt;
2704 off_t writeblockcnt;
2705 off_t readsegcnt;
2706 off_t writesegcnt;
2707 u_long blksize;
2708
2709 u_int64_t temp;
2710
2711 struct proc *p = current_proc();
2712 struct ucred *cred = p->p_ucred;
2713
2714 if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD,
2715 (caddr_t)&readblockcnt, 0, cred, p)))
2716 return (error);
2717
2718 if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE,
2719 (caddr_t)&writeblockcnt, 0, cred, p)))
2720 return (error);
2721
2722 if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD,
2723 (caddr_t)&readsegcnt, 0, cred, p)))
2724 return (error);
2725
2726 if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE,
2727 (caddr_t)&writesegcnt, 0, cred, p)))
2728 return (error);
2729
2730 if ((error = VOP_IOCTL(devvp, DKIOCGETBLOCKSIZE,
2731 (caddr_t)&blksize, 0, cred, p)))
2732 return (error);
2733
2734 temp = readblockcnt * blksize;
2735 temp = (temp > UINT32_MAX) ? (UINT32_MAX / blksize) * blksize : temp;
2736 mp->mnt_maxreadcnt = (u_int32_t)temp;
2737
2738 temp = writeblockcnt * blksize;
2739 temp = (temp > UINT32_MAX) ? (UINT32_MAX / blksize) * blksize : temp;
2740 mp->mnt_maxwritecnt = (u_int32_t)temp;
2741
2742 temp = (readsegcnt > UINT16_MAX) ? UINT16_MAX : readsegcnt;
2743 mp->mnt_segreadcnt = (u_int16_t)temp;
2744
2745 temp = (writesegcnt > UINT16_MAX) ? UINT16_MAX : writesegcnt;
2746 mp->mnt_segwritecnt = (u_int16_t)temp;
2747
2748#if 0
2749 printf("--- IO attributes for mount point 0x%08x ---\n", mp);
2750 printf("\tmnt_maxreadcnt = 0x%x", mp->mnt_maxreadcnt);
2751 printf("\tmnt_maxwritecnt = 0x%x\n", mp->mnt_maxwritecnt);
2752 printf("\tmnt_segreadcnt = 0x%x", mp->mnt_segreadcnt);
2753 printf("\tmnt_segwritecnt = 0x%x\n", mp->mnt_segwritecnt);
2754#endif /* 0 */
2755
2756 return (error);
2757}
2758