2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
30 * Copyright (c) 1989, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95
64 #include <sys/param.h>
65 #include <sys/proc_internal.h>
66 #include <sys/kauth.h>
67 #include <sys/systm.h>
68 #include <sys/kernel.h>
70 #include <sys/buf_internal.h>
71 #include <sys/mount_internal.h>
72 #include <sys/namei.h>
73 #include <sys/vnode_internal.h>
75 #include <sys/errno.h>
76 #include <sys/ioctl.h>
79 #include <sys/malloc.h>
81 #include <sys/uio_internal.h>
82 #include <sys/resource.h>
83 #include <miscfs/specfs/specdev.h>
84 #include <vfs/vfs_support.h>
86 #include <sys/kdebug.h>
88 /* XXX following three prototypes should be in a header file somewhere */
89 extern int isdisk(dev_t dev
, int type
);
90 extern dev_t
chrtoblk(dev_t dev
);
91 extern int iskmemdev(dev_t dev
);
93 struct vnode
*speclisth
[SPECHSZ
];
95 /* symbolic sleep message strings for devices */
96 char devopn
[] = "devopn";
97 char devio
[] = "devio";
98 char devwait
[] = "devwait";
99 char devin
[] = "devin";
100 char devout
[] = "devout";
101 char devioc
[] = "devioc";
102 char devcls
[] = "devcls";
104 #define VOPFUNC int (*)(void *)
106 int (**spec_vnodeop_p
)(void *);
107 struct vnodeopv_entry_desc spec_vnodeop_entries
[] = {
108 { &vnop_default_desc
, (VOPFUNC
)vn_default_error
},
109 { &vnop_lookup_desc
, (VOPFUNC
)spec_lookup
}, /* lookup */
110 { &vnop_create_desc
, (VOPFUNC
)err_create
}, /* create */
111 { &vnop_mknod_desc
, (VOPFUNC
)err_mknod
}, /* mknod */
112 { &vnop_open_desc
, (VOPFUNC
)spec_open
}, /* open */
113 { &vnop_close_desc
, (VOPFUNC
)spec_close
}, /* close */
114 { &vnop_access_desc
, (VOPFUNC
)spec_access
}, /* access */
115 { &vnop_getattr_desc
, (VOPFUNC
)spec_getattr
}, /* getattr */
116 { &vnop_setattr_desc
, (VOPFUNC
)spec_setattr
}, /* setattr */
117 { &vnop_read_desc
, (VOPFUNC
)spec_read
}, /* read */
118 { &vnop_write_desc
, (VOPFUNC
)spec_write
}, /* write */
119 { &vnop_ioctl_desc
, (VOPFUNC
)spec_ioctl
}, /* ioctl */
120 { &vnop_select_desc
, (VOPFUNC
)spec_select
}, /* select */
121 { &vnop_revoke_desc
, (VOPFUNC
)nop_revoke
}, /* revoke */
122 { &vnop_mmap_desc
, (VOPFUNC
)err_mmap
}, /* mmap */
123 { &vnop_fsync_desc
, (VOPFUNC
)spec_fsync
}, /* fsync */
124 { &vnop_remove_desc
, (VOPFUNC
)err_remove
}, /* remove */
125 { &vnop_link_desc
, (VOPFUNC
)err_link
}, /* link */
126 { &vnop_rename_desc
, (VOPFUNC
)err_rename
}, /* rename */
127 { &vnop_mkdir_desc
, (VOPFUNC
)err_mkdir
}, /* mkdir */
128 { &vnop_rmdir_desc
, (VOPFUNC
)err_rmdir
}, /* rmdir */
129 { &vnop_symlink_desc
, (VOPFUNC
)err_symlink
}, /* symlink */
130 { &vnop_readdir_desc
, (VOPFUNC
)err_readdir
}, /* readdir */
131 { &vnop_readlink_desc
, (VOPFUNC
)err_readlink
}, /* readlink */
132 { &vnop_inactive_desc
, (VOPFUNC
)nop_inactive
}, /* inactive */
133 { &vnop_reclaim_desc
, (VOPFUNC
)nop_reclaim
}, /* reclaim */
134 { &vnop_strategy_desc
, (VOPFUNC
)spec_strategy
}, /* strategy */
135 { &vnop_pathconf_desc
, (VOPFUNC
)spec_pathconf
}, /* pathconf */
136 { &vnop_advlock_desc
, (VOPFUNC
)err_advlock
}, /* advlock */
137 { &vnop_bwrite_desc
, (VOPFUNC
)spec_bwrite
}, /* bwrite */
138 { &vnop_pagein_desc
, (VOPFUNC
)err_pagein
}, /* Pagein */
139 { &vnop_pageout_desc
, (VOPFUNC
)err_pageout
}, /* Pageout */
140 { &vnop_copyfile_desc
, (VOPFUNC
)err_copyfile
}, /* Copyfile */
141 { &vnop_blktooff_desc
, (VOPFUNC
)spec_blktooff
}, /* blktooff */
142 { &vnop_offtoblk_desc
, (VOPFUNC
)spec_offtoblk
}, /* offtoblk */
143 { &vnop_blockmap_desc
, (VOPFUNC
)spec_blockmap
}, /* blockmap */
144 { (struct vnodeop_desc
*)NULL
, (int(*)())NULL
}
146 struct vnodeopv_desc spec_vnodeop_opv_desc
=
147 { &spec_vnodeop_p
, spec_vnodeop_entries
};
150 static void set_blocksize(vnode_t
, dev_t
);
154 * Trivial lookup routine that always fails.
157 spec_lookup(struct vnop_lookup_args
*ap
)
165 set_blocksize(struct vnode
*vp
, dev_t dev
)
170 if ((major(dev
) < nblkdev
) && (size
= bdevsw
[major(dev
)].d_psize
)) {
171 rsize
= (*size
)(dev
);
172 if (rsize
<= 0) /* did size fail? */
173 vp
->v_specsize
= DEV_BSIZE
;
175 vp
->v_specsize
= rsize
;
178 vp
->v_specsize
= DEV_BSIZE
;
182 set_fsblocksize(struct vnode
*vp
)
185 if (vp
->v_type
== VBLK
) {
186 dev_t dev
= (dev_t
)vp
->v_rdev
;
187 int maj
= major(dev
);
189 if ((u_int
)maj
>= (u_int
)nblkdev
)
193 set_blocksize(vp
, dev
);
201 * Open a special file.
204 spec_open(struct vnop_open_args
*ap
)
206 struct proc
*p
= vfs_context_proc(ap
->a_context
);
207 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
208 struct vnode
*vp
= ap
->a_vp
;
209 dev_t bdev
, dev
= (dev_t
)vp
->v_rdev
;
210 int maj
= major(dev
);
214 * Don't allow open if fs is mounted -nodev.
216 if (vp
->v_mount
&& (vp
->v_mount
->mnt_flag
& MNT_NODEV
))
219 switch (vp
->v_type
) {
222 if ((u_int
)maj
>= (u_int
)nchrdev
)
224 if (cred
!= FSCRED
&& (ap
->a_mode
& FWRITE
)) {
226 * When running in very secure mode, do not allow
227 * opens for writing of any disk character devices.
229 if (securelevel
>= 2 && isdisk(dev
, VCHR
))
232 * When running in secure mode, do not allow opens
233 * for writing of /dev/mem, /dev/kmem, or character
234 * devices whose corresponding block devices are
237 if (securelevel
>= 1) {
238 if ((bdev
= chrtoblk(dev
)) != NODEV
&& check_mountedon(bdev
, VBLK
, &error
))
244 if (cdevsw
[maj
].d_type
== D_TTY
) {
246 vp
->v_flag
|= VISTTY
;
249 error
= (*cdevsw
[maj
].d_open
)(dev
, ap
->a_mode
, S_IFCHR
, p
);
253 if ((u_int
)maj
>= (u_int
)nblkdev
)
256 * When running in very secure mode, do not allow
257 * opens for writing of any disk block devices.
259 if (securelevel
>= 2 && cred
!= FSCRED
&&
260 (ap
->a_mode
& FWRITE
) && bdevsw
[maj
].d_type
== D_DISK
)
263 * Do not allow opens of block devices that are
266 if ( (error
= vfs_mountedon(vp
)) )
268 error
= (*bdevsw
[maj
].d_open
)(dev
, ap
->a_mode
, S_IFBLK
, p
);
273 u_int32_t size512
= 512;
276 if (!VNOP_IOCTL(vp
, DKIOCGETBLOCKSIZE
, (caddr_t
)&blksize
, 0, ap
->a_context
)) {
277 /* Switch to 512 byte sectors (temporarily) */
279 if (!VNOP_IOCTL(vp
, DKIOCSETBLOCKSIZE
, (caddr_t
)&size512
, FWRITE
, ap
->a_context
)) {
280 /* Get the number of 512 byte physical blocks. */
281 if (!VNOP_IOCTL(vp
, DKIOCGETBLOCKCOUNT
, (caddr_t
)&blkcnt
, 0, ap
->a_context
)) {
285 /* If it doesn't set back, we can't recover */
286 if (VNOP_IOCTL(vp
, DKIOCSETBLOCKSIZE
, (caddr_t
)&blksize
, FWRITE
, ap
->a_context
))
292 set_blocksize(vp
, dev
);
295 * Cache the size in bytes of the block device for later
296 * use by spec_write().
299 vp
->v_specdevsize
= blkcnt
* (u_int64_t
)size512
;
301 vp
->v_specdevsize
= (u_int64_t
)0; /* Default: Can't get */
308 panic("spec_open type");
317 spec_read(struct vnop_read_args
*ap
)
319 struct vnode
*vp
= ap
->a_vp
;
320 struct uio
*uio
= ap
->a_uio
;
322 daddr64_t bn
, nextbn
;
330 if (uio
->uio_rw
!= UIO_READ
)
331 panic("spec_read mode");
332 if (UIO_SEG_IS_USER_SPACE(uio
->uio_segflg
))
333 panic("spec_read proc");
335 if (uio_resid(uio
) == 0)
338 switch (vp
->v_type
) {
341 error
= (*cdevsw
[major(vp
->v_rdev
)].d_read
)
342 (vp
->v_rdev
, uio
, ap
->a_ioflag
);
346 if (uio
->uio_offset
< 0)
351 devBlockSize
= vp
->v_specsize
;
353 if (devBlockSize
> PAGE_SIZE
)
356 bscale
= PAGE_SIZE
/ devBlockSize
;
357 bsize
= bscale
* devBlockSize
;
360 on
= uio
->uio_offset
% bsize
;
362 bn
= (daddr64_t
)((uio
->uio_offset
/ devBlockSize
) &~ (bscale
- 1));
364 if (vp
->v_speclastr
+ bscale
== bn
) {
365 nextbn
= bn
+ bscale
;
366 error
= buf_breadn(vp
, bn
, (int)bsize
, &nextbn
,
367 (int *)&bsize
, 1, NOCRED
, &bp
);
369 error
= buf_bread(vp
, bn
, (int)bsize
, NOCRED
, &bp
);
372 vp
->v_speclastr
= bn
;
375 n
= bsize
- buf_resid(bp
);
376 if ((on
> n
) || error
) {
382 // LP64todo - fix this!
383 n
= min((unsigned)(n
- on
), uio_resid(uio
));
385 error
= uiomove((char *)0 + buf_dataptr(bp
) + on
, n
, uio
);
389 } while (error
== 0 && uio_resid(uio
) > 0 && n
!= 0);
393 panic("spec_read type");
404 spec_write(struct vnop_write_args
*ap
)
406 struct vnode
*vp
= ap
->a_vp
;
407 struct uio
*uio
= ap
->a_uio
;
410 int bsize
, blkmask
, bscale
;
419 if (uio
->uio_rw
!= UIO_WRITE
)
420 panic("spec_write mode");
421 if (UIO_SEG_IS_USER_SPACE(uio
->uio_segflg
))
422 panic("spec_write proc");
425 switch (vp
->v_type
) {
428 error
= (*cdevsw
[major(vp
->v_rdev
)].d_write
)
429 (vp
->v_rdev
, uio
, ap
->a_ioflag
);
433 if (uio_resid(uio
) == 0)
435 if (uio
->uio_offset
< 0)
438 io_sync
= (ap
->a_ioflag
& IO_SYNC
);
439 // LP64todo - fix this!
440 io_size
= uio_resid(uio
);
444 devBlockSize
= vp
->v_specsize
;
445 if (devBlockSize
> PAGE_SIZE
)
448 bscale
= PAGE_SIZE
/ devBlockSize
;
449 blkmask
= bscale
- 1;
450 bsize
= bscale
* devBlockSize
;
454 bn
= (daddr64_t
)((uio
->uio_offset
/ devBlockSize
) &~ blkmask
);
455 on
= uio
->uio_offset
% bsize
;
457 // LP64todo - fix this!
458 n
= min((unsigned)(bsize
- on
), uio_resid(uio
));
461 * Use buf_getblk() as an optimization IFF:
463 * 1) We are reading exactly a block on a block
465 * 2) We know the size of the device from spec_open
466 * 3) The read doesn't span the end of the device
468 * Otherwise, we fall back on buf_bread().
471 vp
->v_specdevsize
!= (u_int64_t
)0 &&
472 (uio
->uio_offset
+ (u_int64_t
)n
) > vp
->v_specdevsize
) {
473 /* reduce the size of the read to what is there */
474 n
= (uio
->uio_offset
+ (u_int64_t
)n
) - vp
->v_specdevsize
;
478 bp
= buf_getblk(vp
, bn
, bsize
, 0, 0, BLK_WRITE
);
480 error
= (int)buf_bread(vp
, bn
, bsize
, NOCRED
, &bp
);
482 /* Translate downstream error for upstream, if needed */
484 error
= (int)buf_error(bp
);
489 n
= min(n
, bsize
- buf_resid(bp
));
491 error
= uiomove((char *)0 + buf_dataptr(bp
) + on
, n
, uio
);
499 error
= buf_bwrite(bp
);
501 if ((n
+ on
) == bsize
)
502 error
= buf_bawrite(bp
);
504 error
= buf_bdwrite(bp
);
506 } while (error
== 0 && uio_resid(uio
) > 0 && n
!= 0);
510 panic("spec_write type");
518 * Device ioctl operation.
521 spec_ioctl(struct vnop_ioctl_args
*ap
)
523 proc_t p
= vfs_context_proc(ap
->a_context
);
524 dev_t dev
= ap
->a_vp
->v_rdev
;
526 switch (ap
->a_vp
->v_type
) {
529 return ((*cdevsw
[major(dev
)].d_ioctl
)(dev
, ap
->a_command
, ap
->a_data
,
533 if (ap
->a_command
== 0 && (unsigned int)ap
->a_data
== B_TAPE
) {
534 if (bdevsw
[major(dev
)].d_type
== D_TAPE
)
539 return ((*bdevsw
[major(dev
)].d_ioctl
)(dev
, ap
->a_command
, ap
->a_data
,
550 spec_select(struct vnop_select_args
*ap
)
552 proc_t p
= vfs_context_proc(ap
->a_context
);
555 switch (ap
->a_vp
->v_type
) {
558 return (1); /* XXX */
561 dev
= ap
->a_vp
->v_rdev
;
562 return (*cdevsw
[major(dev
)].d_select
)(dev
, ap
->a_which
, ap
->a_wql
, p
);
567 * Synch buffers associated with a block device
570 spec_fsync_internal(vnode_t vp
, int waitfor
, __unused vfs_context_t context
)
572 if (vp
->v_type
== VCHR
)
575 * Flush all dirty buffers associated with a block device.
577 buf_flushdirtyblks(vp
, waitfor
== MNT_WAIT
, 0, "spec_fsync");
583 spec_fsync(struct vnop_fsync_args
*ap
)
585 return spec_fsync_internal(ap
->a_vp
, ap
->a_waitfor
, ap
->a_context
);
589 * Just call the device strategy routine
591 extern int hard_throttle_on_root
;
593 extern void throttle_lowpri_io(int *lowpri_window
,mount_t v_mount
);
595 // the low priority process may wait for at most LOWPRI_MAX_DELAY millisecond
596 #define LOWPRI_INITIAL_WINDOW_MSECS 100
597 #define LOWPRI_WINDOW_MSECS_INC 50
598 #define LOWPRI_MAX_WINDOW_MSECS 200
599 #define LOWPRI_MAX_WAITING_MSECS 200
600 #define LOWPRI_SLEEP_INTERVAL 5
602 int lowpri_IO_initial_window_msecs
= LOWPRI_INITIAL_WINDOW_MSECS
;
603 int lowpri_IO_window_msecs_inc
= LOWPRI_WINDOW_MSECS_INC
;
604 int lowpri_max_window_msecs
= LOWPRI_MAX_WINDOW_MSECS
;
605 int lowpri_max_waiting_msecs
= LOWPRI_MAX_WAITING_MSECS
;
607 SYSCTL_INT(_debug
, OID_AUTO
, lowpri_IO_initial_window_msecs
, CTLFLAG_RW
, &lowpri_IO_initial_window_msecs
, LOWPRI_INITIAL_WINDOW_MSECS
, "");
608 SYSCTL_INT(_debug
, OID_AUTO
, lowpri_IO_window_inc
, CTLFLAG_RW
, &lowpri_IO_window_msecs_inc
, LOWPRI_INITIAL_WINDOW_MSECS
, "");
609 SYSCTL_INT(_debug
, OID_AUTO
, lowpri_max_window_msecs
, CTLFLAG_RW
, &lowpri_max_window_msecs
, LOWPRI_INITIAL_WINDOW_MSECS
, "");
610 SYSCTL_INT(_debug
, OID_AUTO
, lowpri_max_waiting_msecs
, CTLFLAG_RW
, &lowpri_max_waiting_msecs
, LOWPRI_INITIAL_WINDOW_MSECS
, "");
612 void throttle_lowpri_io(int *lowpri_window
,mount_t v_mount
)
615 struct timeval last_lowpri_IO_timestamp
,last_normal_IO_timestamp
;
616 struct timeval elapsed
;
617 int lowpri_IO_window_msecs
;
618 struct timeval lowpri_IO_window
;
619 int max_try_num
= lowpri_max_waiting_msecs
/ LOWPRI_SLEEP_INTERVAL
;
621 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 97)) | DBG_FUNC_START
,
622 *lowpri_window
, 0, 0, 0, 0);
624 last_normal_IO_timestamp
= v_mount
->last_normal_IO_timestamp
;
626 for (i
=0; i
<max_try_num
; i
++) {
627 microuptime(&last_lowpri_IO_timestamp
);
629 elapsed
= last_lowpri_IO_timestamp
;
630 timevalsub(&elapsed
, &last_normal_IO_timestamp
);
632 lowpri_IO_window_msecs
= *lowpri_window
;
633 lowpri_IO_window
.tv_sec
= lowpri_IO_window_msecs
/ 1000;
634 lowpri_IO_window
.tv_usec
= (lowpri_IO_window_msecs
% 1000) * 1000;
636 if (timevalcmp(&elapsed
, &lowpri_IO_window
, <)) {
637 IOSleep(LOWPRI_SLEEP_INTERVAL
);
643 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 97)) | DBG_FUNC_END
,
644 *lowpri_window
, i
*5, 0, 0, 0);
649 spec_strategy(struct vnop_strategy_args
*ap
)
656 bdev
= buf_device(bp
);
657 bflags
= buf_flags(bp
);
664 if (bflags
& B_ASYNC
)
669 else if (bflags
& B_PAGEIO
)
672 KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW
, code
) | DBG_FUNC_NONE
,
673 (unsigned int)bp
, bdev
, (int)buf_blkno(bp
), buf_count(bp
), 0);
675 if (((bflags
& (B_PAGEIO
| B_READ
)) == (B_PAGEIO
| B_READ
)) &&
676 (buf_vnode(bp
)->v_mount
->mnt_kern_flag
& MNTK_ROOTDEV
))
677 hard_throttle_on_root
= 1;
679 if (lowpri_IO_initial_window_msecs
) {
682 int policy
= IOPOL_DEFAULT
;
683 int is_throttleable_io
= 0;
684 int is_passive_io
= 0;
686 ut
= get_bsdthread_info(current_thread());
689 policy
= p
->p_iopol_disk
;
692 // the I/O policy of the thread overrides that of the process
693 // unless the I/O policy of the thread is default
694 if (ut
->uu_iopol_disk
!= IOPOL_DEFAULT
)
695 policy
= ut
->uu_iopol_disk
;
703 is_throttleable_io
= 1;
709 printf("unknown I/O policy %d", policy
);
713 if (!is_throttleable_io
&& ISSET(bflags
, B_PASSIVE
))
716 if (!is_throttleable_io
) {
717 if (!is_passive_io
&& buf_vnode(bp
)->v_mount
!= NULL
){
718 microuptime(&(buf_vnode(bp
)->v_mount
->last_normal_IO_timestamp
));
722 * I'd really like to do the IOSleep here, but
723 * we may be holding all kinds of filesystem related locks
724 * and the pages for this I/O marked 'busy'...
725 * we don't want to cause a normal task to block on
726 * one of these locks while we're throttling a task marked
727 * for low priority I/O... we'll mark the uthread and
728 * do the delay just before we return from the system
729 * call that triggered this I/O or from vnode_pagein
731 if(buf_vnode(bp
)->v_mount
!= NULL
)
732 ut
->v_mount
= buf_vnode(bp
)->v_mount
;
733 if (ut
->uu_lowpri_window
== 0) {
734 ut
->uu_lowpri_window
= lowpri_IO_initial_window_msecs
;
736 ut
->uu_lowpri_window
+= lowpri_IO_window_msecs_inc
;
737 if (ut
->uu_lowpri_window
> lowpri_max_window_msecs
)
738 ut
->uu_lowpri_window
= lowpri_max_window_msecs
;
742 (*bdevsw
[major(bdev
)].d_strategy
)(bp
);
749 * This is a noop, simply returning what one has been given.
752 spec_blockmap(__unused
struct vnop_blockmap_args
*ap
)
759 * Device close routine
762 spec_close(struct vnop_close_args
*ap
)
764 struct vnode
*vp
= ap
->a_vp
;
765 dev_t dev
= vp
->v_rdev
;
766 int (*devclose
)(dev_t
, int, int, struct proc
*);
768 int flags
= ap
->a_fflag
;
769 struct proc
*p
= vfs_context_proc(ap
->a_context
);
770 struct session
*sessp
;
772 switch (vp
->v_type
) {
776 * Hack: a tty device that is a controlling terminal
777 * has a reference from the session structure.
778 * We cannot easily tell that a character device is
779 * a controlling terminal, unless it is the closing
780 * process' controlling terminal. In that case,
781 * if the reference count is 2 (this last descriptor
782 * plus the session), release the reference from the session.
784 sessp
= proc_session(p
);
785 if (sessp
!= SESSION_NULL
) {
786 if ((vcount(vp
) == 2) &&
787 (vp
== sessp
->s_ttyvp
)) {
789 sessp
->s_ttyvp
= NULL
;
791 sessp
->s_ttyp
= NULL
;
792 sessp
->s_ttypgrpid
= NO_PID
;
793 session_unlock(sessp
);
799 devclose
= cdevsw
[major(dev
)].d_close
;
802 * close on last reference or on vnode revoke call
804 if ((flags
& IO_REVOKE
) != 0)
811 #ifdef DEVFS_IMPLEMENTS_LOCKING
813 * On last close of a block device (that isn't mounted)
814 * we must invalidate any in core blocks, so that
815 * we can, for instance, change floppy disks.
817 if ((error
= spec_fsync_internal(vp
, MNT_WAIT
, ap
->a_context
)))
820 error
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0);
824 * Since every use (buffer, vnode, swap, blockmap)
825 * holds a reference to the vnode, and because we mark
826 * any other vnodes that alias this device, when the
827 * sum of the reference counts on all the aliased
828 * vnodes descends to one, we are on last close.
832 #else /* DEVFS_IMPLEMENTS_LOCKING */
834 * Since every use (buffer, vnode, swap, blockmap)
835 * holds a reference to the vnode, and because we mark
836 * any other vnodes that alias this device, when the
837 * sum of the reference counts on all the aliased
838 * vnodes descends to one, we are on last close.
844 * On last close of a block device (that isn't mounted)
845 * we must invalidate any in core blocks, so that
846 * we can, for instance, change floppy disks.
848 if ((error
= spec_fsync_internal(vp
, MNT_WAIT
, ap
->a_context
)))
851 error
= buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0);
854 #endif /* DEVFS_IMPLEMENTS_LOCKING */
855 devclose
= bdevsw
[major(dev
)].d_close
;
860 panic("spec_close: not special");
864 return ((*devclose
)(dev
, flags
, mode
, p
));
868 * Return POSIX pathconf information applicable to special devices.
871 spec_pathconf(struct vnop_pathconf_args
*ap
)
874 switch (ap
->a_name
) {
876 *ap
->a_retval
= LINK_MAX
;
879 *ap
->a_retval
= MAX_CANON
;
882 *ap
->a_retval
= MAX_INPUT
;
885 *ap
->a_retval
= PIPE_BUF
;
887 case _PC_CHOWN_RESTRICTED
:
888 *ap
->a_retval
= 200112; /* _POSIX_CHOWN_RESTRICTED */
891 *ap
->a_retval
= _POSIX_VDISABLE
;
900 * Special device failed operation
903 spec_ebadf(__unused
void *dummy
)
909 /* Blktooff derives file offset from logical block number */
911 spec_blktooff(struct vnop_blktooff_args
*ap
)
913 struct vnode
*vp
= ap
->a_vp
;
915 switch (vp
->v_type
) {
917 *ap
->a_offset
= (off_t
)-1; /* failure */
921 printf("spec_blktooff: not implemented for VBLK\n");
922 *ap
->a_offset
= (off_t
)-1; /* failure */
926 panic("spec_blktooff type");
933 /* Offtoblk derives logical block number from file offset */
935 spec_offtoblk(struct vnop_offtoblk_args
*ap
)
937 struct vnode
*vp
= ap
->a_vp
;
939 switch (vp
->v_type
) {
941 *ap
->a_lblkno
= (daddr64_t
)-1; /* failure */
945 printf("spec_offtoblk: not implemented for VBLK\n");
946 *ap
->a_lblkno
= (daddr64_t
)-1; /* failure */
950 panic("spec_offtoblk type");