]> git.saurik.com Git - apple/xnu.git/blob - bsd/miscfs/specfs/spec_vnops.c
xnu-792.6.56.tar.gz
[apple/xnu.git] / bsd / miscfs / specfs / spec_vnops.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
24 /*
25 * Copyright (c) 1989, 1993, 1995
26 * The Regents of the University of California. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. All advertising materials mentioning features or use of this software
37 * must display the following acknowledgement:
38 * This product includes software developed by the University of
39 * California, Berkeley and its contributors.
40 * 4. Neither the name of the University nor the names of its contributors
41 * may be used to endorse or promote products derived from this software
42 * without specific prior written permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95
57 */
58
59 #include <sys/param.h>
60 #include <sys/proc_internal.h>
61 #include <sys/kauth.h>
62 #include <sys/systm.h>
63 #include <sys/kernel.h>
64 #include <sys/conf.h>
65 #include <sys/buf_internal.h>
66 #include <sys/mount_internal.h>
67 #include <sys/namei.h>
68 #include <sys/vnode_internal.h>
69 #include <sys/stat.h>
70 #include <sys/errno.h>
71 #include <sys/ioctl.h>
72 #include <sys/file.h>
73 #include <sys/user.h>
74 #include <sys/malloc.h>
75 #include <sys/disk.h>
76 #include <sys/uio_internal.h>
77 #include <miscfs/specfs/specdev.h>
78 #include <vfs/vfs_support.h>
79
80 #include <sys/kdebug.h>
81
82 struct vnode *speclisth[SPECHSZ];
83
84 /* symbolic sleep message strings for devices */
85 char devopn[] = "devopn";
86 char devio[] = "devio";
87 char devwait[] = "devwait";
88 char devin[] = "devin";
89 char devout[] = "devout";
90 char devioc[] = "devioc";
91 char devcls[] = "devcls";
92
93 #define VOPFUNC int (*)(void *)
94
95 int (**spec_vnodeop_p)(void *);
96 struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
97 { &vnop_default_desc, (VOPFUNC)vn_default_error },
98 { &vnop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */
99 { &vnop_create_desc, (VOPFUNC)err_create }, /* create */
100 { &vnop_mknod_desc, (VOPFUNC)err_mknod }, /* mknod */
101 { &vnop_open_desc, (VOPFUNC)spec_open }, /* open */
102 { &vnop_close_desc, (VOPFUNC)spec_close }, /* close */
103 { &vnop_access_desc, (VOPFUNC)spec_access }, /* access */
104 { &vnop_getattr_desc, (VOPFUNC)spec_getattr }, /* getattr */
105 { &vnop_setattr_desc, (VOPFUNC)spec_setattr }, /* setattr */
106 { &vnop_read_desc, (VOPFUNC)spec_read }, /* read */
107 { &vnop_write_desc, (VOPFUNC)spec_write }, /* write */
108 { &vnop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */
109 { &vnop_select_desc, (VOPFUNC)spec_select }, /* select */
110 { &vnop_revoke_desc, (VOPFUNC)nop_revoke }, /* revoke */
111 { &vnop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */
112 { &vnop_fsync_desc, (VOPFUNC)spec_fsync }, /* fsync */
113 { &vnop_remove_desc, (VOPFUNC)err_remove }, /* remove */
114 { &vnop_link_desc, (VOPFUNC)err_link }, /* link */
115 { &vnop_rename_desc, (VOPFUNC)err_rename }, /* rename */
116 { &vnop_mkdir_desc, (VOPFUNC)err_mkdir }, /* mkdir */
117 { &vnop_rmdir_desc, (VOPFUNC)err_rmdir }, /* rmdir */
118 { &vnop_symlink_desc, (VOPFUNC)err_symlink }, /* symlink */
119 { &vnop_readdir_desc, (VOPFUNC)err_readdir }, /* readdir */
120 { &vnop_readlink_desc, (VOPFUNC)err_readlink }, /* readlink */
121 { &vnop_inactive_desc, (VOPFUNC)nop_inactive }, /* inactive */
122 { &vnop_reclaim_desc, (VOPFUNC)nop_reclaim }, /* reclaim */
123 { &vnop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */
124 { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */
125 { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */
126 { &vnop_bwrite_desc, (VOPFUNC)spec_bwrite }, /* bwrite */
127 { &vnop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */
128 { &vnop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */
129 { &vnop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */
130 { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */
131 { &vnop_blktooff_desc, (VOPFUNC)spec_blktooff }, /* blktooff */
132 { &vnop_offtoblk_desc, (VOPFUNC)spec_offtoblk }, /* offtoblk */
133 { &vnop_blockmap_desc, (VOPFUNC)spec_blockmap }, /* blockmap */
134 { (struct vnodeop_desc*)NULL, (int(*)())NULL }
135 };
136 struct vnodeopv_desc spec_vnodeop_opv_desc =
137 { &spec_vnodeop_p, spec_vnodeop_entries };
138
139
140 static void set_blocksize(vnode_t, dev_t);
141
142
143 /*
144 * Trivial lookup routine that always fails.
145 */
146 int
147 spec_lookup(ap)
148 struct vnop_lookup_args /* {
149 struct vnode *a_dvp;
150 struct vnode **a_vpp;
151 struct componentname *a_cnp;
152 vfs_context_t a_context;
153 } */ *ap;
154 {
155
156 *ap->a_vpp = NULL;
157 return (ENOTDIR);
158 }
159
160 static void
161 set_blocksize(struct vnode *vp, dev_t dev)
162 {
163 int (*size)(dev_t);
164 int rsize;
165
166 if ((major(dev) < nblkdev) && (size = bdevsw[major(dev)].d_psize)) {
167 rsize = (*size)(dev);
168 if (rsize <= 0) /* did size fail? */
169 vp->v_specsize = DEV_BSIZE;
170 else
171 vp->v_specsize = rsize;
172 }
173 else
174 vp->v_specsize = DEV_BSIZE;
175 }
176
177 void
178 set_fsblocksize(struct vnode *vp)
179 {
180
181 if (vp->v_type == VBLK) {
182 dev_t dev = (dev_t)vp->v_rdev;
183 int maj = major(dev);
184
185 if ((u_int)maj >= (u_int)nblkdev)
186 return;
187
188 vnode_lock(vp);
189 set_blocksize(vp, dev);
190 vnode_unlock(vp);
191 }
192
193 }
194
195
196 /*
197 * Open a special file.
198 */
199 int
200 spec_open(ap)
201 struct vnop_open_args /* {
202 struct vnode *a_vp;
203 int a_mode;
204 vfs_context_t a_context;
205 } */ *ap;
206 {
207 struct proc *p = vfs_context_proc(ap->a_context);
208 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
209 struct vnode *vp = ap->a_vp;
210 dev_t bdev, dev = (dev_t)vp->v_rdev;
211 int maj = major(dev);
212 int error;
213
214 /*
215 * Don't allow open if fs is mounted -nodev.
216 */
217 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
218 return (ENXIO);
219
220 switch (vp->v_type) {
221
222 case VCHR:
223 if ((u_int)maj >= (u_int)nchrdev)
224 return (ENXIO);
225 if (cred != FSCRED && (ap->a_mode & FWRITE)) {
226 /*
227 * When running in very secure mode, do not allow
228 * opens for writing of any disk character devices.
229 */
230 if (securelevel >= 2 && isdisk(dev, VCHR))
231 return (EPERM);
232 /*
233 * When running in secure mode, do not allow opens
234 * for writing of /dev/mem, /dev/kmem, or character
235 * devices whose corresponding block devices are
236 * currently mounted.
237 */
238 if (securelevel >= 1) {
239 if ((bdev = chrtoblk(dev)) != NODEV && check_mountedon(bdev, VBLK, &error))
240 return (error);
241 if (iskmemdev(dev))
242 return (EPERM);
243 }
244 }
245 if (cdevsw[maj].d_type == D_TTY) {
246 vnode_lock(vp);
247 vp->v_flag |= VISTTY;
248 vnode_unlock(vp);
249 }
250 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p);
251 return (error);
252
253 case VBLK:
254 if ((u_int)maj >= (u_int)nblkdev)
255 return (ENXIO);
256 /*
257 * When running in very secure mode, do not allow
258 * opens for writing of any disk block devices.
259 */
260 if (securelevel >= 2 && cred != FSCRED &&
261 (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK)
262 return (EPERM);
263 /*
264 * Do not allow opens of block devices that are
265 * currently mounted.
266 */
267 if ( (error = vfs_mountedon(vp)) )
268 return (error);
269 error = (*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p);
270 if (!error) {
271 u_int64_t blkcnt;
272 u_int32_t blksize;
273 int setsize = 0;
274 u_int32_t size512 = 512;
275
276
277 if (!VNOP_IOCTL(vp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, ap->a_context)) {
278 /* Switch to 512 byte sectors (temporarily) */
279
280 if (!VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, ap->a_context)) {
281 /* Get the number of 512 byte physical blocks. */
282 if (!VNOP_IOCTL(vp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, ap->a_context)) {
283 setsize = 1;
284 }
285 }
286 /* If it doesn't set back, we can't recover */
287 if (VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, ap->a_context))
288 error = ENXIO;
289 }
290
291
292 vnode_lock(vp);
293 set_blocksize(vp, dev);
294
295 /*
296 * Cache the size in bytes of the block device for later
297 * use by spec_write().
298 */
299 if (setsize)
300 vp->v_specdevsize = blkcnt * (u_int64_t)size512;
301 else
302 vp->v_specdevsize = (u_int64_t)0; /* Default: Can't get */
303
304 vnode_unlock(vp);
305
306 }
307 return(error);
308 default:
309 panic("spec_open type");
310 }
311 return (0);
312 }
313
314 /*
315 * Vnode op for read
316 */
317 int
318 spec_read(ap)
319 struct vnop_read_args /* {
320 struct vnode *a_vp;
321 struct uio *a_uio;
322 int a_ioflag;
323 vfs_context_t a_context;
324 } */ *ap;
325 {
326 register struct vnode *vp = ap->a_vp;
327 register struct uio *uio = ap->a_uio;
328 struct buf *bp;
329 daddr64_t bn, nextbn;
330 long bsize, bscale;
331 int devBlockSize=0;
332 int n, on;
333 int error = 0;
334 dev_t dev;
335
336 #if DIAGNOSTIC
337 if (uio->uio_rw != UIO_READ)
338 panic("spec_read mode");
339 if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg))
340 panic("spec_read proc");
341 #endif
342 if (uio_resid(uio) == 0)
343 return (0);
344
345 switch (vp->v_type) {
346
347 case VCHR:
348 error = (*cdevsw[major(vp->v_rdev)].d_read)
349 (vp->v_rdev, uio, ap->a_ioflag);
350 return (error);
351
352 case VBLK:
353 if (uio->uio_offset < 0)
354 return (EINVAL);
355
356 dev = vp->v_rdev;
357
358 devBlockSize = vp->v_specsize;
359
360 if (devBlockSize > PAGE_SIZE)
361 return (EINVAL);
362
363 bscale = PAGE_SIZE / devBlockSize;
364 bsize = bscale * devBlockSize;
365
366 do {
367 on = uio->uio_offset % bsize;
368
369 bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ (bscale - 1));
370
371 if (vp->v_speclastr + bscale == bn) {
372 nextbn = bn + bscale;
373 error = buf_breadn(vp, bn, (int)bsize, &nextbn,
374 (int *)&bsize, 1, NOCRED, &bp);
375 } else
376 error = buf_bread(vp, bn, (int)bsize, NOCRED, &bp);
377
378 vnode_lock(vp);
379 vp->v_speclastr = bn;
380 vnode_unlock(vp);
381
382 n = bsize - buf_resid(bp);
383 if ((on > n) || error) {
384 if (!error)
385 error = EINVAL;
386 buf_brelse(bp);
387 return (error);
388 }
389 // LP64todo - fix this!
390 n = min((unsigned)(n - on), uio_resid(uio));
391
392 error = uiomove((char *)buf_dataptr(bp) + on, n, uio);
393 if (n + on == bsize)
394 buf_markaged(bp);
395 buf_brelse(bp);
396 } while (error == 0 && uio_resid(uio) > 0 && n != 0);
397 return (error);
398
399 default:
400 panic("spec_read type");
401 }
402 /* NOTREACHED */
403
404 return (0);
405 }
406
407 /*
408 * Vnode op for write
409 */
410 int
411 spec_write(ap)
412 struct vnop_write_args /* {
413 struct vnode *a_vp;
414 struct uio *a_uio;
415 int a_ioflag;
416 vfs_context_t a_context;
417 } */ *ap;
418 {
419 register struct vnode *vp = ap->a_vp;
420 register struct uio *uio = ap->a_uio;
421 struct buf *bp;
422 daddr64_t bn;
423 int bsize, blkmask, bscale;
424 register int io_sync;
425 register int io_size;
426 int devBlockSize=0;
427 register int n, on;
428 int error = 0;
429 dev_t dev;
430
431 #if DIAGNOSTIC
432 if (uio->uio_rw != UIO_WRITE)
433 panic("spec_write mode");
434 if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg))
435 panic("spec_write proc");
436 #endif
437
438 switch (vp->v_type) {
439
440 case VCHR:
441 error = (*cdevsw[major(vp->v_rdev)].d_write)
442 (vp->v_rdev, uio, ap->a_ioflag);
443 return (error);
444
445 case VBLK:
446 if (uio_resid(uio) == 0)
447 return (0);
448 if (uio->uio_offset < 0)
449 return (EINVAL);
450
451 io_sync = (ap->a_ioflag & IO_SYNC);
452 // LP64todo - fix this!
453 io_size = uio_resid(uio);
454
455 dev = (vp->v_rdev);
456
457 devBlockSize = vp->v_specsize;
458 if (devBlockSize > PAGE_SIZE)
459 return(EINVAL);
460
461 bscale = PAGE_SIZE / devBlockSize;
462 blkmask = bscale - 1;
463 bsize = bscale * devBlockSize;
464
465
466 do {
467 bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ blkmask);
468 on = uio->uio_offset % bsize;
469
470 // LP64todo - fix this!
471 n = min((unsigned)(bsize - on), uio_resid(uio));
472
473 /*
474 * Use buf_getblk() as an optimization IFF:
475 *
476 * 1) We are reading exactly a block on a block
477 * aligned boundary
478 * 2) We know the size of the device from spec_open
479 * 3) The read doesn't span the end of the device
480 *
481 * Otherwise, we fall back on buf_bread().
482 */
483 if (n == bsize &&
484 vp->v_specdevsize != (u_int64_t)0 &&
485 (uio->uio_offset + (u_int64_t)n) > vp->v_specdevsize) {
486 /* reduce the size of the read to what is there */
487 n = (uio->uio_offset + (u_int64_t)n) - vp->v_specdevsize;
488 }
489
490 if (n == bsize)
491 bp = buf_getblk(vp, bn, bsize, 0, 0, BLK_WRITE);
492 else
493 error = (int)buf_bread(vp, bn, bsize, NOCRED, &bp);
494
495 /* Translate downstream error for upstream, if needed */
496 if (!error)
497 error = (int)buf_error(bp);
498 if (error) {
499 buf_brelse(bp);
500 return (error);
501 }
502 n = min(n, bsize - buf_resid(bp));
503
504 error = uiomove((char *)buf_dataptr(bp) + on, n, uio);
505 if (error) {
506 buf_brelse(bp);
507 return (error);
508 }
509 buf_markaged(bp);
510
511 if (io_sync)
512 error = buf_bwrite(bp);
513 else {
514 if ((n + on) == bsize)
515 error = buf_bawrite(bp);
516 else
517 error = buf_bdwrite(bp);
518 }
519 } while (error == 0 && uio_resid(uio) > 0 && n != 0);
520 return (error);
521
522 default:
523 panic("spec_write type");
524 }
525 /* NOTREACHED */
526
527 return (0);
528 }
529
530 /*
531 * Device ioctl operation.
532 */
533 int
534 spec_ioctl(ap)
535 struct vnop_ioctl_args /* {
536 struct vnode *a_vp;
537 int a_command;
538 caddr_t a_data;
539 int a_fflag;
540 vfs_context_t a_context;
541 } */ *ap;
542 {
543 proc_t p = vfs_context_proc(ap->a_context);
544 dev_t dev = ap->a_vp->v_rdev;
545
546 switch (ap->a_vp->v_type) {
547
548 case VCHR:
549 return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
550 ap->a_fflag, p));
551
552 case VBLK:
553 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) {
554 if (bdevsw[major(dev)].d_type == D_TAPE)
555 return (0);
556 else
557 return (1);
558 }
559 return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
560 ap->a_fflag, p));
561
562 default:
563 panic("spec_ioctl");
564 /* NOTREACHED */
565 }
566 return (0);
567 }
568
569 int
570 spec_select(ap)
571 struct vnop_select_args /* {
572 struct vnode *a_vp;
573 int a_which;
574 int a_fflags;
575 void * a_wql;
576 vfs_context_t a_context;
577 } */ *ap;
578 {
579 proc_t p = vfs_context_proc(ap->a_context);
580 register dev_t dev;
581
582 switch (ap->a_vp->v_type) {
583
584 default:
585 return (1); /* XXX */
586
587 case VCHR:
588 dev = ap->a_vp->v_rdev;
589 return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_wql, p);
590 }
591 }
592
593 /*
594 * Synch buffers associated with a block device
595 */
596 int
597 spec_fsync_internal(vnode_t vp, int waitfor, __unused vfs_context_t context)
598 {
599 if (vp->v_type == VCHR)
600 return (0);
601 /*
602 * Flush all dirty buffers associated with a block device.
603 */
604 buf_flushdirtyblks(vp, waitfor == MNT_WAIT, 0, (char *)"spec_fsync");
605
606 return (0);
607 }
608
609 int
610 spec_fsync(ap)
611 struct vnop_fsync_args /* {
612 struct vnode *a_vp;
613 int a_waitfor;
614 vfs_context_t a_context;
615 } */ *ap;
616 {
617 return spec_fsync_internal(ap->a_vp, ap->a_waitfor, ap->a_context);
618 }
619
620 /*
621 * Just call the device strategy routine
622 */
623 extern int hard_throttle_on_root;
624
625
626 #define LOWPRI_DELAY_MSECS 200
627 #define LOWPRI_WINDOW_MSECS 200
628
629 int lowpri_IO_window_msecs = LOWPRI_WINDOW_MSECS;
630 int lowpri_IO_delay_msecs = LOWPRI_DELAY_MSECS;
631
632 struct timeval last_normal_IO_timestamp;
633 struct timeval last_lowpri_IO_timestamp;
634 struct timeval lowpri_IO_window = { 0, LOWPRI_WINDOW_MSECS * 1000 };
635
636 int
637 spec_strategy(ap)
638 struct vnop_strategy_args /* {
639 struct buf *a_bp;
640 } */ *ap;
641 {
642 buf_t bp;
643 int bflags;
644 dev_t bdev;
645 proc_t p;
646 struct timeval elapsed;
647
648 bp = ap->a_bp;
649 bdev = buf_device(bp);
650 bflags = buf_flags(bp);
651
652 if (kdebug_enable) {
653 int code = 0;
654
655 if (bflags & B_READ)
656 code |= DKIO_READ;
657 if (bflags & B_ASYNC)
658 code |= DKIO_ASYNC;
659
660 if (bflags & B_META)
661 code |= DKIO_META;
662 else if (bflags & B_PAGEIO)
663 code |= DKIO_PAGING;
664
665 KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE,
666 (unsigned int)bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0);
667 }
668 if (((bflags & (B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) &&
669 (buf_vnode(bp)->v_mount->mnt_kern_flag & MNTK_ROOTDEV))
670 hard_throttle_on_root = 1;
671
672 if ( lowpri_IO_delay_msecs && lowpri_IO_window_msecs ) {
673 p = current_proc();
674
675 if ( (p == NULL) || !(p->p_lflag & P_LLOW_PRI_IO)) {
676 if (!(p->p_lflag & P_LBACKGROUND_IO))
677 microuptime(&last_normal_IO_timestamp);
678 } else {
679 microuptime(&last_lowpri_IO_timestamp);
680
681 elapsed = last_lowpri_IO_timestamp;
682 timevalsub(&elapsed, &last_normal_IO_timestamp);
683
684 lowpri_IO_window.tv_sec = lowpri_IO_window_msecs / 1000;
685 lowpri_IO_window.tv_usec = (lowpri_IO_window_msecs % 1000) * 1000;
686
687 if (timevalcmp(&elapsed, &lowpri_IO_window, <)) {
688 struct uthread *ut;
689
690 /*
691 * I'd really like to do the IOSleep here, but
692 * we may be holding all kinds of filesystem related locks
693 * and the pages for this I/O marked 'busy'...
694 * we don't want to cause a normal task to block on
695 * one of these locks while we're throttling a task marked
696 * for low priority I/O... we'll mark the uthread and
697 * do the delay just before we return from the system
698 * call that triggered this I/O or from vnode_pagein
699 */
700 ut = get_bsdthread_info(current_thread());
701 ut->uu_lowpri_delay = lowpri_IO_delay_msecs;
702 }
703 }
704 }
705 (*bdevsw[major(bdev)].d_strategy)(bp);
706
707 return (0);
708 }
709
710
711 /*
712 * This is a noop, simply returning what one has been given.
713 */
714 int
715 spec_blockmap(__unused struct vnop_blockmap_args *ap)
716 {
717 return (ENOTSUP);
718 }
719
720
721 /*
722 * Device close routine
723 */
724 int
725 spec_close(ap)
726 struct vnop_close_args /* {
727 struct vnode *a_vp;
728 int a_fflag;
729 vfs_context_t a_context;
730 } */ *ap;
731 {
732 register struct vnode *vp = ap->a_vp;
733 dev_t dev = vp->v_rdev;
734 int (*devclose)(dev_t, int, int, struct proc *);
735 int mode, error;
736 struct proc *p = vfs_context_proc(ap->a_context);
737
738 switch (vp->v_type) {
739
740 case VCHR:
741 /*
742 * Hack: a tty device that is a controlling terminal
743 * has a reference from the session structure.
744 * We cannot easily tell that a character device is
745 * a controlling terminal, unless it is the closing
746 * process' controlling terminal. In that case,
747 * if the reference count is 2 (this last descriptor
748 * plus the session), release the reference from the session.
749 */
750 if (vcount(vp) == 2 && p &&
751 vp == p->p_session->s_ttyvp) {
752 p->p_session->s_ttyvp = NULL;
753 vnode_rele(vp);
754 }
755 /*
756 * close on last reference.
757 */
758 if (vcount(vp) > 1)
759 return (0);
760 devclose = cdevsw[major(dev)].d_close;
761 mode = S_IFCHR;
762 break;
763
764 case VBLK:
765 #ifdef DEVFS_IMPLEMENTS_LOCKING
766 /*
767 * On last close of a block device (that isn't mounted)
768 * we must invalidate any in core blocks, so that
769 * we can, for instance, change floppy disks.
770 */
771 if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context)))
772 return (error);
773
774 error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
775 if (error)
776 return (error);
777 /*
778 * Since every use (buffer, vnode, swap, blockmap)
779 * holds a reference to the vnode, and because we mark
780 * any other vnodes that alias this device, when the
781 * sum of the reference counts on all the aliased
782 * vnodes descends to one, we are on last close.
783 */
784 if (vcount(vp) > 1)
785 return (0);
786 #else /* DEVFS_IMPLEMENTS_LOCKING */
787 /*
788 * Since every use (buffer, vnode, swap, blockmap)
789 * holds a reference to the vnode, and because we mark
790 * any other vnodes that alias this device, when the
791 * sum of the reference counts on all the aliased
792 * vnodes descends to one, we are on last close.
793 */
794 if (vcount(vp) > 1)
795 return (0);
796
797 /*
798 * On last close of a block device (that isn't mounted)
799 * we must invalidate any in core blocks, so that
800 * we can, for instance, change floppy disks.
801 */
802 if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context)))
803 return (error);
804
805 error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
806 if (error)
807 return (error);
808 #endif /* DEVFS_IMPLEMENTS_LOCKING */
809 devclose = bdevsw[major(dev)].d_close;
810 mode = S_IFBLK;
811 break;
812
813 default:
814 panic("spec_close: not special");
815 }
816
817 return ((*devclose)(dev, ap->a_fflag, mode, p));
818 }
819
820 /*
821 * Return POSIX pathconf information applicable to special devices.
822 */
823 int
824 spec_pathconf(ap)
825 struct vnop_pathconf_args /* {
826 struct vnode *a_vp;
827 int a_name;
828 int *a_retval;
829 vfs_context_t a_context;
830 } */ *ap;
831 {
832
833 switch (ap->a_name) {
834 case _PC_LINK_MAX:
835 *ap->a_retval = LINK_MAX;
836 return (0);
837 case _PC_MAX_CANON:
838 *ap->a_retval = MAX_CANON;
839 return (0);
840 case _PC_MAX_INPUT:
841 *ap->a_retval = MAX_INPUT;
842 return (0);
843 case _PC_PIPE_BUF:
844 *ap->a_retval = PIPE_BUF;
845 return (0);
846 case _PC_CHOWN_RESTRICTED:
847 *ap->a_retval = 1;
848 return (0);
849 case _PC_VDISABLE:
850 *ap->a_retval = _POSIX_VDISABLE;
851 return (0);
852 default:
853 return (EINVAL);
854 }
855 /* NOTREACHED */
856 }
857
858 int
859 spec_devblocksize(ap)
860 struct vnop_devblocksize_args /* {
861 struct vnode *a_vp;
862 int *a_retval;
863 } */ *ap;
864 {
865 *ap->a_retval = (ap->a_vp->v_specsize);
866 return (0);
867 }
868
869 /*
870 * Special device failed operation
871 */
872 int
873 spec_ebadf(__unused void *dummy)
874 {
875
876 return (EBADF);
877 }
878
879 /*
880 * Special device bad operation
881 */
882 int
883 spec_badop()
884 {
885
886 panic("spec_badop called");
887 /* NOTREACHED */
888 }
889
890 /* Blktooff derives file offset from logical block number */
891 int
892 spec_blktooff(ap)
893 struct vnop_blktooff_args /* {
894 struct vnode *a_vp;
895 daddr64_t a_lblkno;
896 off_t *a_offset;
897 } */ *ap;
898 {
899 register struct vnode *vp = ap->a_vp;
900
901 switch (vp->v_type) {
902 case VCHR:
903 *ap->a_offset = (off_t)-1; /* failure */
904 return (ENOTSUP);
905
906 case VBLK:
907 printf("spec_blktooff: not implemented for VBLK\n");
908 *ap->a_offset = (off_t)-1; /* failure */
909 return (ENOTSUP);
910
911 default:
912 panic("spec_blktooff type");
913 }
914 /* NOTREACHED */
915
916 return (0);
917 }
918
919 /* Offtoblk derives logical block number from file offset */
920 int
921 spec_offtoblk(ap)
922 struct vnop_offtoblk_args /* {
923 struct vnode *a_vp;
924 off_t a_offset;
925 daddr64_t *a_lblkno;
926 } */ *ap;
927 {
928 register struct vnode *vp = ap->a_vp;
929
930 switch (vp->v_type) {
931 case VCHR:
932 *ap->a_lblkno = (daddr64_t)-1; /* failure */
933 return (ENOTSUP);
934
935 case VBLK:
936 printf("spec_offtoblk: not implemented for VBLK\n");
937 *ap->a_lblkno = (daddr64_t)-1; /* failure */
938 return (ENOTSUP);
939
940 default:
941 panic("spec_offtoblk type");
942 }
943 /* NOTREACHED */
944
945 return (0);
946 }