]> git.saurik.com Git - apple/xnu.git/blob - bsd/miscfs/specfs/spec_vnops.c
7cb75e4cd51b5b2155d2b53957116c0acdd2b024
[apple/xnu.git] / bsd / miscfs / specfs / spec_vnops.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23 /*
24 * Copyright (c) 1989, 1993, 1995
25 * The Regents of the University of California. All rights reserved.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 * 1. Redistributions of source code must retain the above copyright
31 * notice, this list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions and the following disclaimer in the
34 * documentation and/or other materials provided with the distribution.
35 * 3. All advertising materials mentioning features or use of this software
36 * must display the following acknowledgement:
37 * This product includes software developed by the University of
38 * California, Berkeley and its contributors.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95
56 */
57
58 #include <sys/param.h>
59 #include <sys/proc_internal.h>
60 #include <sys/kauth.h>
61 #include <sys/systm.h>
62 #include <sys/kernel.h>
63 #include <sys/conf.h>
64 #include <sys/buf_internal.h>
65 #include <sys/mount_internal.h>
66 #include <sys/namei.h>
67 #include <sys/vnode_internal.h>
68 #include <sys/stat.h>
69 #include <sys/errno.h>
70 #include <sys/ioctl.h>
71 #include <sys/file.h>
72 #include <sys/user.h>
73 #include <sys/malloc.h>
74 #include <sys/disk.h>
75 #include <sys/uio_internal.h>
76 #include <miscfs/specfs/specdev.h>
77 #include <vfs/vfs_support.h>
78
79 #include <sys/kdebug.h>
80
81 struct vnode *speclisth[SPECHSZ];
82
83 /* symbolic sleep message strings for devices */
84 char devopn[] = "devopn";
85 char devio[] = "devio";
86 char devwait[] = "devwait";
87 char devin[] = "devin";
88 char devout[] = "devout";
89 char devioc[] = "devioc";
90 char devcls[] = "devcls";
91
92 #define VOPFUNC int (*)(void *)
93
94 int (**spec_vnodeop_p)(void *);
95 struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
96 { &vnop_default_desc, (VOPFUNC)vn_default_error },
97 { &vnop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */
98 { &vnop_create_desc, (VOPFUNC)err_create }, /* create */
99 { &vnop_mknod_desc, (VOPFUNC)err_mknod }, /* mknod */
100 { &vnop_open_desc, (VOPFUNC)spec_open }, /* open */
101 { &vnop_close_desc, (VOPFUNC)spec_close }, /* close */
102 { &vnop_access_desc, (VOPFUNC)spec_access }, /* access */
103 { &vnop_getattr_desc, (VOPFUNC)spec_getattr }, /* getattr */
104 { &vnop_setattr_desc, (VOPFUNC)spec_setattr }, /* setattr */
105 { &vnop_read_desc, (VOPFUNC)spec_read }, /* read */
106 { &vnop_write_desc, (VOPFUNC)spec_write }, /* write */
107 { &vnop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */
108 { &vnop_select_desc, (VOPFUNC)spec_select }, /* select */
109 { &vnop_revoke_desc, (VOPFUNC)nop_revoke }, /* revoke */
110 { &vnop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */
111 { &vnop_fsync_desc, (VOPFUNC)spec_fsync }, /* fsync */
112 { &vnop_remove_desc, (VOPFUNC)err_remove }, /* remove */
113 { &vnop_link_desc, (VOPFUNC)err_link }, /* link */
114 { &vnop_rename_desc, (VOPFUNC)err_rename }, /* rename */
115 { &vnop_mkdir_desc, (VOPFUNC)err_mkdir }, /* mkdir */
116 { &vnop_rmdir_desc, (VOPFUNC)err_rmdir }, /* rmdir */
117 { &vnop_symlink_desc, (VOPFUNC)err_symlink }, /* symlink */
118 { &vnop_readdir_desc, (VOPFUNC)err_readdir }, /* readdir */
119 { &vnop_readlink_desc, (VOPFUNC)err_readlink }, /* readlink */
120 { &vnop_inactive_desc, (VOPFUNC)nop_inactive }, /* inactive */
121 { &vnop_reclaim_desc, (VOPFUNC)nop_reclaim }, /* reclaim */
122 { &vnop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */
123 { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */
124 { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */
125 { &vnop_bwrite_desc, (VOPFUNC)spec_bwrite }, /* bwrite */
126 { &vnop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */
127 { &vnop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */
128 { &vnop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */
129 { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */
130 { &vnop_blktooff_desc, (VOPFUNC)spec_blktooff }, /* blktooff */
131 { &vnop_offtoblk_desc, (VOPFUNC)spec_offtoblk }, /* offtoblk */
132 { &vnop_blockmap_desc, (VOPFUNC)spec_blockmap }, /* blockmap */
133 { (struct vnodeop_desc*)NULL, (int(*)())NULL }
134 };
135 struct vnodeopv_desc spec_vnodeop_opv_desc =
136 { &spec_vnodeop_p, spec_vnodeop_entries };
137
138
139 static void set_blocksize(vnode_t, dev_t);
140
141
142 /*
143 * Trivial lookup routine that always fails.
144 */
145 int
146 spec_lookup(ap)
147 struct vnop_lookup_args /* {
148 struct vnode *a_dvp;
149 struct vnode **a_vpp;
150 struct componentname *a_cnp;
151 vfs_context_t a_context;
152 } */ *ap;
153 {
154
155 *ap->a_vpp = NULL;
156 return (ENOTDIR);
157 }
158
159 static void
160 set_blocksize(struct vnode *vp, dev_t dev)
161 {
162 int (*size)(dev_t);
163 int rsize;
164
165 if ((major(dev) < nblkdev) && (size = bdevsw[major(dev)].d_psize)) {
166 rsize = (*size)(dev);
167 if (rsize <= 0) /* did size fail? */
168 vp->v_specsize = DEV_BSIZE;
169 else
170 vp->v_specsize = rsize;
171 }
172 else
173 vp->v_specsize = DEV_BSIZE;
174 }
175
176 void
177 set_fsblocksize(struct vnode *vp)
178 {
179
180 if (vp->v_type == VBLK) {
181 dev_t dev = (dev_t)vp->v_rdev;
182 int maj = major(dev);
183
184 if ((u_int)maj >= (u_int)nblkdev)
185 return;
186
187 vnode_lock(vp);
188 set_blocksize(vp, dev);
189 vnode_unlock(vp);
190 }
191
192 }
193
194
195 /*
196 * Open a special file.
197 */
198 int
199 spec_open(ap)
200 struct vnop_open_args /* {
201 struct vnode *a_vp;
202 int a_mode;
203 vfs_context_t a_context;
204 } */ *ap;
205 {
206 struct proc *p = vfs_context_proc(ap->a_context);
207 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
208 struct vnode *vp = ap->a_vp;
209 dev_t bdev, dev = (dev_t)vp->v_rdev;
210 int maj = major(dev);
211 int error;
212
213 /*
214 * Don't allow open if fs is mounted -nodev.
215 */
216 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
217 return (ENXIO);
218
219 switch (vp->v_type) {
220
221 case VCHR:
222 if ((u_int)maj >= (u_int)nchrdev)
223 return (ENXIO);
224 if (cred != FSCRED && (ap->a_mode & FWRITE)) {
225 /*
226 * When running in very secure mode, do not allow
227 * opens for writing of any disk character devices.
228 */
229 if (securelevel >= 2 && isdisk(dev, VCHR))
230 return (EPERM);
231 /*
232 * When running in secure mode, do not allow opens
233 * for writing of /dev/mem, /dev/kmem, or character
234 * devices whose corresponding block devices are
235 * currently mounted.
236 */
237 if (securelevel >= 1) {
238 if ((bdev = chrtoblk(dev)) != NODEV && check_mountedon(bdev, VBLK, &error))
239 return (error);
240 if (iskmemdev(dev))
241 return (EPERM);
242 }
243 }
244 if (cdevsw[maj].d_type == D_TTY) {
245 vnode_lock(vp);
246 vp->v_flag |= VISTTY;
247 vnode_unlock(vp);
248 }
249 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p);
250 return (error);
251
252 case VBLK:
253 if ((u_int)maj >= (u_int)nblkdev)
254 return (ENXIO);
255 /*
256 * When running in very secure mode, do not allow
257 * opens for writing of any disk block devices.
258 */
259 if (securelevel >= 2 && cred != FSCRED &&
260 (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK)
261 return (EPERM);
262 /*
263 * Do not allow opens of block devices that are
264 * currently mounted.
265 */
266 if ( (error = vfs_mountedon(vp)) )
267 return (error);
268 error = (*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p);
269 if (!error) {
270 u_int64_t blkcnt;
271 u_int32_t blksize;
272 int setsize = 0;
273 u_int32_t size512 = 512;
274
275
276 if (!VNOP_IOCTL(vp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, ap->a_context)) {
277 /* Switch to 512 byte sectors (temporarily) */
278
279 if (!VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, ap->a_context)) {
280 /* Get the number of 512 byte physical blocks. */
281 if (!VNOP_IOCTL(vp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, ap->a_context)) {
282 setsize = 1;
283 }
284 }
285 /* If it doesn't set back, we can't recover */
286 if (VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, ap->a_context))
287 error = ENXIO;
288 }
289
290
291 vnode_lock(vp);
292 set_blocksize(vp, dev);
293
294 /*
295 * Cache the size in bytes of the block device for later
296 * use by spec_write().
297 */
298 if (setsize)
299 vp->v_specdevsize = blkcnt * (u_int64_t)size512;
300 else
301 vp->v_specdevsize = (u_int64_t)0; /* Default: Can't get */
302
303 vnode_unlock(vp);
304
305 }
306 return(error);
307 default:
308 panic("spec_open type");
309 }
310 return (0);
311 }
312
313 /*
314 * Vnode op for read
315 */
316 int
317 spec_read(ap)
318 struct vnop_read_args /* {
319 struct vnode *a_vp;
320 struct uio *a_uio;
321 int a_ioflag;
322 vfs_context_t a_context;
323 } */ *ap;
324 {
325 register struct vnode *vp = ap->a_vp;
326 register struct uio *uio = ap->a_uio;
327 struct buf *bp;
328 daddr64_t bn, nextbn;
329 long bsize, bscale;
330 int devBlockSize=0;
331 int n, on;
332 int error = 0;
333 dev_t dev;
334
335 #if DIAGNOSTIC
336 if (uio->uio_rw != UIO_READ)
337 panic("spec_read mode");
338 if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg))
339 panic("spec_read proc");
340 #endif
341 if (uio_resid(uio) == 0)
342 return (0);
343
344 switch (vp->v_type) {
345
346 case VCHR:
347 error = (*cdevsw[major(vp->v_rdev)].d_read)
348 (vp->v_rdev, uio, ap->a_ioflag);
349 return (error);
350
351 case VBLK:
352 if (uio->uio_offset < 0)
353 return (EINVAL);
354
355 dev = vp->v_rdev;
356
357 devBlockSize = vp->v_specsize;
358
359 if (devBlockSize > PAGE_SIZE)
360 return (EINVAL);
361
362 bscale = PAGE_SIZE / devBlockSize;
363 bsize = bscale * devBlockSize;
364
365 do {
366 on = uio->uio_offset % bsize;
367
368 bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ (bscale - 1));
369
370 if (vp->v_speclastr + bscale == bn) {
371 nextbn = bn + bscale;
372 error = buf_breadn(vp, bn, (int)bsize, &nextbn,
373 (int *)&bsize, 1, NOCRED, &bp);
374 } else
375 error = buf_bread(vp, bn, (int)bsize, NOCRED, &bp);
376
377 vnode_lock(vp);
378 vp->v_speclastr = bn;
379 vnode_unlock(vp);
380
381 n = bsize - buf_resid(bp);
382 if ((on > n) || error) {
383 if (!error)
384 error = EINVAL;
385 buf_brelse(bp);
386 return (error);
387 }
388 // LP64todo - fix this!
389 n = min((unsigned)(n - on), uio_resid(uio));
390
391 error = uiomove((char *)buf_dataptr(bp) + on, n, uio);
392 if (n + on == bsize)
393 buf_markaged(bp);
394 buf_brelse(bp);
395 } while (error == 0 && uio_resid(uio) > 0 && n != 0);
396 return (error);
397
398 default:
399 panic("spec_read type");
400 }
401 /* NOTREACHED */
402
403 return (0);
404 }
405
406 /*
407 * Vnode op for write
408 */
409 int
410 spec_write(ap)
411 struct vnop_write_args /* {
412 struct vnode *a_vp;
413 struct uio *a_uio;
414 int a_ioflag;
415 vfs_context_t a_context;
416 } */ *ap;
417 {
418 register struct vnode *vp = ap->a_vp;
419 register struct uio *uio = ap->a_uio;
420 struct buf *bp;
421 daddr64_t bn;
422 int bsize, blkmask, bscale;
423 register int io_sync;
424 register int io_size;
425 int devBlockSize=0;
426 register int n, on;
427 int error = 0;
428 dev_t dev;
429
430 #if DIAGNOSTIC
431 if (uio->uio_rw != UIO_WRITE)
432 panic("spec_write mode");
433 if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg))
434 panic("spec_write proc");
435 #endif
436
437 switch (vp->v_type) {
438
439 case VCHR:
440 error = (*cdevsw[major(vp->v_rdev)].d_write)
441 (vp->v_rdev, uio, ap->a_ioflag);
442 return (error);
443
444 case VBLK:
445 if (uio_resid(uio) == 0)
446 return (0);
447 if (uio->uio_offset < 0)
448 return (EINVAL);
449
450 io_sync = (ap->a_ioflag & IO_SYNC);
451 // LP64todo - fix this!
452 io_size = uio_resid(uio);
453
454 dev = (vp->v_rdev);
455
456 devBlockSize = vp->v_specsize;
457 if (devBlockSize > PAGE_SIZE)
458 return(EINVAL);
459
460 bscale = PAGE_SIZE / devBlockSize;
461 blkmask = bscale - 1;
462 bsize = bscale * devBlockSize;
463
464
465 do {
466 bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ blkmask);
467 on = uio->uio_offset % bsize;
468
469 // LP64todo - fix this!
470 n = min((unsigned)(bsize - on), uio_resid(uio));
471
472 /*
473 * Use buf_getblk() as an optimization IFF:
474 *
475 * 1) We are reading exactly a block on a block
476 * aligned boundary
477 * 2) We know the size of the device from spec_open
478 * 3) The read doesn't span the end of the device
479 *
480 * Otherwise, we fall back on buf_bread().
481 */
482 if (n == bsize &&
483 vp->v_specdevsize != (u_int64_t)0 &&
484 (uio->uio_offset + (u_int64_t)n) > vp->v_specdevsize) {
485 /* reduce the size of the read to what is there */
486 n = (uio->uio_offset + (u_int64_t)n) - vp->v_specdevsize;
487 }
488
489 if (n == bsize)
490 bp = buf_getblk(vp, bn, bsize, 0, 0, BLK_WRITE);
491 else
492 error = (int)buf_bread(vp, bn, bsize, NOCRED, &bp);
493
494 /* Translate downstream error for upstream, if needed */
495 if (!error)
496 error = (int)buf_error(bp);
497 if (error) {
498 buf_brelse(bp);
499 return (error);
500 }
501 n = min(n, bsize - buf_resid(bp));
502
503 error = uiomove((char *)buf_dataptr(bp) + on, n, uio);
504 if (error) {
505 buf_brelse(bp);
506 return (error);
507 }
508 buf_markaged(bp);
509
510 if (io_sync)
511 error = buf_bwrite(bp);
512 else {
513 if ((n + on) == bsize)
514 error = buf_bawrite(bp);
515 else
516 error = buf_bdwrite(bp);
517 }
518 } while (error == 0 && uio_resid(uio) > 0 && n != 0);
519 return (error);
520
521 default:
522 panic("spec_write type");
523 }
524 /* NOTREACHED */
525
526 return (0);
527 }
528
529 /*
530 * Device ioctl operation.
531 */
532 int
533 spec_ioctl(ap)
534 struct vnop_ioctl_args /* {
535 struct vnode *a_vp;
536 int a_command;
537 caddr_t a_data;
538 int a_fflag;
539 vfs_context_t a_context;
540 } */ *ap;
541 {
542 proc_t p = vfs_context_proc(ap->a_context);
543 dev_t dev = ap->a_vp->v_rdev;
544
545 switch (ap->a_vp->v_type) {
546
547 case VCHR:
548 return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
549 ap->a_fflag, p));
550
551 case VBLK:
552 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) {
553 if (bdevsw[major(dev)].d_type == D_TAPE)
554 return (0);
555 else
556 return (1);
557 }
558 return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
559 ap->a_fflag, p));
560
561 default:
562 panic("spec_ioctl");
563 /* NOTREACHED */
564 }
565 return (0);
566 }
567
568 int
569 spec_select(ap)
570 struct vnop_select_args /* {
571 struct vnode *a_vp;
572 int a_which;
573 int a_fflags;
574 void * a_wql;
575 vfs_context_t a_context;
576 } */ *ap;
577 {
578 proc_t p = vfs_context_proc(ap->a_context);
579 register dev_t dev;
580
581 switch (ap->a_vp->v_type) {
582
583 default:
584 return (1); /* XXX */
585
586 case VCHR:
587 dev = ap->a_vp->v_rdev;
588 return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_wql, p);
589 }
590 }
591
592 /*
593 * Synch buffers associated with a block device
594 */
595 int
596 spec_fsync_internal(vnode_t vp, int waitfor, __unused vfs_context_t context)
597 {
598 if (vp->v_type == VCHR)
599 return (0);
600 /*
601 * Flush all dirty buffers associated with a block device.
602 */
603 buf_flushdirtyblks(vp, waitfor == MNT_WAIT, 0, (char *)"spec_fsync");
604
605 return (0);
606 }
607
608 int
609 spec_fsync(ap)
610 struct vnop_fsync_args /* {
611 struct vnode *a_vp;
612 int a_waitfor;
613 vfs_context_t a_context;
614 } */ *ap;
615 {
616 return spec_fsync_internal(ap->a_vp, ap->a_waitfor, ap->a_context);
617 }
618
619 /*
620 * Just call the device strategy routine
621 */
622 extern int hard_throttle_on_root;
623
624
625 #define LOWPRI_DELAY_MSECS 200
626 #define LOWPRI_WINDOW_MSECS 200
627
628 int lowpri_IO_window_msecs = LOWPRI_WINDOW_MSECS;
629 int lowpri_IO_delay_msecs = LOWPRI_DELAY_MSECS;
630
631 struct timeval last_normal_IO_timestamp;
632 struct timeval last_lowpri_IO_timestamp;
633 struct timeval lowpri_IO_window = { 0, LOWPRI_WINDOW_MSECS * 1000 };
634
635 int
636 spec_strategy(ap)
637 struct vnop_strategy_args /* {
638 struct buf *a_bp;
639 } */ *ap;
640 {
641 buf_t bp;
642 int bflags;
643 dev_t bdev;
644 proc_t p;
645 struct timeval elapsed;
646
647 bp = ap->a_bp;
648 bdev = buf_device(bp);
649 bflags = buf_flags(bp);
650
651 if (kdebug_enable) {
652 int code = 0;
653
654 if (bflags & B_READ)
655 code |= DKIO_READ;
656 if (bflags & B_ASYNC)
657 code |= DKIO_ASYNC;
658
659 if (bflags & B_META)
660 code |= DKIO_META;
661 else if (bflags & B_PAGEIO)
662 code |= DKIO_PAGING;
663
664 KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE,
665 (unsigned int)bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0);
666 }
667 if (((bflags & (B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) &&
668 (buf_vnode(bp)->v_mount->mnt_kern_flag & MNTK_ROOTDEV))
669 hard_throttle_on_root = 1;
670
671 if ( lowpri_IO_delay_msecs && lowpri_IO_window_msecs ) {
672 p = current_proc();
673
674 if ( (p == NULL) || !(p->p_lflag & P_LLOW_PRI_IO)) {
675 if (!(p->p_lflag & P_LBACKGROUND_IO))
676 microuptime(&last_normal_IO_timestamp);
677 } else {
678 microuptime(&last_lowpri_IO_timestamp);
679
680 elapsed = last_lowpri_IO_timestamp;
681 timevalsub(&elapsed, &last_normal_IO_timestamp);
682
683 lowpri_IO_window.tv_sec = lowpri_IO_window_msecs / 1000;
684 lowpri_IO_window.tv_usec = (lowpri_IO_window_msecs % 1000) * 1000;
685
686 if (timevalcmp(&elapsed, &lowpri_IO_window, <)) {
687 struct uthread *ut;
688
689 /*
690 * I'd really like to do the IOSleep here, but
691 * we may be holding all kinds of filesystem related locks
692 * and the pages for this I/O marked 'busy'...
693 * we don't want to cause a normal task to block on
694 * one of these locks while we're throttling a task marked
695 * for low priority I/O... we'll mark the uthread and
696 * do the delay just before we return from the system
697 * call that triggered this I/O or from vnode_pagein
698 */
699 ut = get_bsdthread_info(current_thread());
700 ut->uu_lowpri_delay = lowpri_IO_delay_msecs;
701 }
702 }
703 }
704 (*bdevsw[major(bdev)].d_strategy)(bp);
705
706 return (0);
707 }
708
709
710 /*
711 * This is a noop, simply returning what one has been given.
712 */
713 int
714 spec_blockmap(__unused struct vnop_blockmap_args *ap)
715 {
716 return (ENOTSUP);
717 }
718
719
720 /*
721 * Device close routine
722 */
723 int
724 spec_close(ap)
725 struct vnop_close_args /* {
726 struct vnode *a_vp;
727 int a_fflag;
728 vfs_context_t a_context;
729 } */ *ap;
730 {
731 register struct vnode *vp = ap->a_vp;
732 dev_t dev = vp->v_rdev;
733 int (*devclose)(dev_t, int, int, struct proc *);
734 int mode, error;
735 struct proc *p = vfs_context_proc(ap->a_context);
736
737 switch (vp->v_type) {
738
739 case VCHR:
740 /*
741 * Hack: a tty device that is a controlling terminal
742 * has a reference from the session structure.
743 * We cannot easily tell that a character device is
744 * a controlling terminal, unless it is the closing
745 * process' controlling terminal. In that case,
746 * if the reference count is 2 (this last descriptor
747 * plus the session), release the reference from the session.
748 */
749 if (vcount(vp) == 2 && p &&
750 vp == p->p_session->s_ttyvp) {
751 p->p_session->s_ttyvp = NULL;
752 vnode_rele(vp);
753 }
754 /*
755 * close on last reference.
756 */
757 if (vcount(vp) > 1)
758 return (0);
759 devclose = cdevsw[major(dev)].d_close;
760 mode = S_IFCHR;
761 break;
762
763 case VBLK:
764 #ifdef DEVFS_IMPLEMENTS_LOCKING
765 /*
766 * On last close of a block device (that isn't mounted)
767 * we must invalidate any in core blocks, so that
768 * we can, for instance, change floppy disks.
769 */
770 if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context)))
771 return (error);
772
773 error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
774 if (error)
775 return (error);
776 /*
777 * Since every use (buffer, vnode, swap, blockmap)
778 * holds a reference to the vnode, and because we mark
779 * any other vnodes that alias this device, when the
780 * sum of the reference counts on all the aliased
781 * vnodes descends to one, we are on last close.
782 */
783 if (vcount(vp) > 1)
784 return (0);
785 #else /* DEVFS_IMPLEMENTS_LOCKING */
786 /*
787 * Since every use (buffer, vnode, swap, blockmap)
788 * holds a reference to the vnode, and because we mark
789 * any other vnodes that alias this device, when the
790 * sum of the reference counts on all the aliased
791 * vnodes descends to one, we are on last close.
792 */
793 if (vcount(vp) > 1)
794 return (0);
795
796 /*
797 * On last close of a block device (that isn't mounted)
798 * we must invalidate any in core blocks, so that
799 * we can, for instance, change floppy disks.
800 */
801 if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context)))
802 return (error);
803
804 error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
805 if (error)
806 return (error);
807 #endif /* DEVFS_IMPLEMENTS_LOCKING */
808 devclose = bdevsw[major(dev)].d_close;
809 mode = S_IFBLK;
810 break;
811
812 default:
813 panic("spec_close: not special");
814 }
815
816 return ((*devclose)(dev, ap->a_fflag, mode, p));
817 }
818
819 /*
820 * Return POSIX pathconf information applicable to special devices.
821 */
822 int
823 spec_pathconf(ap)
824 struct vnop_pathconf_args /* {
825 struct vnode *a_vp;
826 int a_name;
827 int *a_retval;
828 vfs_context_t a_context;
829 } */ *ap;
830 {
831
832 switch (ap->a_name) {
833 case _PC_LINK_MAX:
834 *ap->a_retval = LINK_MAX;
835 return (0);
836 case _PC_MAX_CANON:
837 *ap->a_retval = MAX_CANON;
838 return (0);
839 case _PC_MAX_INPUT:
840 *ap->a_retval = MAX_INPUT;
841 return (0);
842 case _PC_PIPE_BUF:
843 *ap->a_retval = PIPE_BUF;
844 return (0);
845 case _PC_CHOWN_RESTRICTED:
846 *ap->a_retval = 1;
847 return (0);
848 case _PC_VDISABLE:
849 *ap->a_retval = _POSIX_VDISABLE;
850 return (0);
851 default:
852 return (EINVAL);
853 }
854 /* NOTREACHED */
855 }
856
857 int
858 spec_devblocksize(ap)
859 struct vnop_devblocksize_args /* {
860 struct vnode *a_vp;
861 int *a_retval;
862 } */ *ap;
863 {
864 *ap->a_retval = (ap->a_vp->v_specsize);
865 return (0);
866 }
867
868 /*
869 * Special device failed operation
870 */
871 int
872 spec_ebadf(__unused void *dummy)
873 {
874
875 return (EBADF);
876 }
877
878 /*
879 * Special device bad operation
880 */
881 int
882 spec_badop()
883 {
884
885 panic("spec_badop called");
886 /* NOTREACHED */
887 }
888
889 /* Blktooff derives file offset from logical block number */
890 int
891 spec_blktooff(ap)
892 struct vnop_blktooff_args /* {
893 struct vnode *a_vp;
894 daddr64_t a_lblkno;
895 off_t *a_offset;
896 } */ *ap;
897 {
898 register struct vnode *vp = ap->a_vp;
899
900 switch (vp->v_type) {
901 case VCHR:
902 *ap->a_offset = (off_t)-1; /* failure */
903 return (ENOTSUP);
904
905 case VBLK:
906 printf("spec_blktooff: not implemented for VBLK\n");
907 *ap->a_offset = (off_t)-1; /* failure */
908 return (ENOTSUP);
909
910 default:
911 panic("spec_blktooff type");
912 }
913 /* NOTREACHED */
914
915 return (0);
916 }
917
918 /* Offtoblk derives logical block number from file offset */
919 int
920 spec_offtoblk(ap)
921 struct vnop_offtoblk_args /* {
922 struct vnode *a_vp;
923 off_t a_offset;
924 daddr64_t *a_lblkno;
925 } */ *ap;
926 {
927 register struct vnode *vp = ap->a_vp;
928
929 switch (vp->v_type) {
930 case VCHR:
931 *ap->a_lblkno = (daddr64_t)-1; /* failure */
932 return (ENOTSUP);
933
934 case VBLK:
935 printf("spec_offtoblk: not implemented for VBLK\n");
936 *ap->a_lblkno = (daddr64_t)-1; /* failure */
937 return (ENOTSUP);
938
939 default:
940 panic("spec_offtoblk type");
941 }
942 /* NOTREACHED */
943
944 return (0);
945 }