]> git.saurik.com Git - apple/xnu.git/blob - bsd/miscfs/specfs/spec_vnops.c
1c88c6a22ba68ab3853718bd1acccb04e65f3031
[apple/xnu.git] / bsd / miscfs / specfs / spec_vnops.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95
62 */
63
64 #include <sys/param.h>
65 #include <sys/proc_internal.h>
66 #include <sys/kauth.h>
67 #include <sys/systm.h>
68 #include <sys/kernel.h>
69 #include <sys/conf.h>
70 #include <sys/buf_internal.h>
71 #include <sys/mount_internal.h>
72 #include <sys/namei.h>
73 #include <sys/vnode_internal.h>
74 #include <sys/stat.h>
75 #include <sys/errno.h>
76 #include <sys/ioctl.h>
77 #include <sys/file.h>
78 #include <sys/user.h>
79 #include <sys/malloc.h>
80 #include <sys/disk.h>
81 #include <sys/uio_internal.h>
82 #include <miscfs/specfs/specdev.h>
83 #include <vfs/vfs_support.h>
84
85 #include <sys/kdebug.h>
86
87 struct vnode *speclisth[SPECHSZ];
88
89 /* symbolic sleep message strings for devices */
90 char devopn[] = "devopn";
91 char devio[] = "devio";
92 char devwait[] = "devwait";
93 char devin[] = "devin";
94 char devout[] = "devout";
95 char devioc[] = "devioc";
96 char devcls[] = "devcls";
97
98 #define VOPFUNC int (*)(void *)
99
100 int (**spec_vnodeop_p)(void *);
101 struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
102 { &vnop_default_desc, (VOPFUNC)vn_default_error },
103 { &vnop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */
104 { &vnop_create_desc, (VOPFUNC)err_create }, /* create */
105 { &vnop_mknod_desc, (VOPFUNC)err_mknod }, /* mknod */
106 { &vnop_open_desc, (VOPFUNC)spec_open }, /* open */
107 { &vnop_close_desc, (VOPFUNC)spec_close }, /* close */
108 { &vnop_access_desc, (VOPFUNC)spec_access }, /* access */
109 { &vnop_getattr_desc, (VOPFUNC)spec_getattr }, /* getattr */
110 { &vnop_setattr_desc, (VOPFUNC)spec_setattr }, /* setattr */
111 { &vnop_read_desc, (VOPFUNC)spec_read }, /* read */
112 { &vnop_write_desc, (VOPFUNC)spec_write }, /* write */
113 { &vnop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */
114 { &vnop_select_desc, (VOPFUNC)spec_select }, /* select */
115 { &vnop_revoke_desc, (VOPFUNC)nop_revoke }, /* revoke */
116 { &vnop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */
117 { &vnop_fsync_desc, (VOPFUNC)spec_fsync }, /* fsync */
118 { &vnop_remove_desc, (VOPFUNC)err_remove }, /* remove */
119 { &vnop_link_desc, (VOPFUNC)err_link }, /* link */
120 { &vnop_rename_desc, (VOPFUNC)err_rename }, /* rename */
121 { &vnop_mkdir_desc, (VOPFUNC)err_mkdir }, /* mkdir */
122 { &vnop_rmdir_desc, (VOPFUNC)err_rmdir }, /* rmdir */
123 { &vnop_symlink_desc, (VOPFUNC)err_symlink }, /* symlink */
124 { &vnop_readdir_desc, (VOPFUNC)err_readdir }, /* readdir */
125 { &vnop_readlink_desc, (VOPFUNC)err_readlink }, /* readlink */
126 { &vnop_inactive_desc, (VOPFUNC)nop_inactive }, /* inactive */
127 { &vnop_reclaim_desc, (VOPFUNC)nop_reclaim }, /* reclaim */
128 { &vnop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */
129 { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */
130 { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */
131 { &vnop_bwrite_desc, (VOPFUNC)spec_bwrite }, /* bwrite */
132 { &vnop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */
133 { &vnop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */
134 { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */
135 { &vnop_blktooff_desc, (VOPFUNC)spec_blktooff }, /* blktooff */
136 { &vnop_offtoblk_desc, (VOPFUNC)spec_offtoblk }, /* offtoblk */
137 { &vnop_blockmap_desc, (VOPFUNC)spec_blockmap }, /* blockmap */
138 { (struct vnodeop_desc*)NULL, (int(*)())NULL }
139 };
140 struct vnodeopv_desc spec_vnodeop_opv_desc =
141 { &spec_vnodeop_p, spec_vnodeop_entries };
142
143
144 static void set_blocksize(vnode_t, dev_t);
145
146
147 /*
148 * Trivial lookup routine that always fails.
149 */
150 int
151 spec_lookup(ap)
152 struct vnop_lookup_args /* {
153 struct vnode *a_dvp;
154 struct vnode **a_vpp;
155 struct componentname *a_cnp;
156 vfs_context_t a_context;
157 } */ *ap;
158 {
159
160 *ap->a_vpp = NULL;
161 return (ENOTDIR);
162 }
163
164 static void
165 set_blocksize(struct vnode *vp, dev_t dev)
166 {
167 int (*size)(dev_t);
168 int rsize;
169
170 if ((major(dev) < nblkdev) && (size = bdevsw[major(dev)].d_psize)) {
171 rsize = (*size)(dev);
172 if (rsize <= 0) /* did size fail? */
173 vp->v_specsize = DEV_BSIZE;
174 else
175 vp->v_specsize = rsize;
176 }
177 else
178 vp->v_specsize = DEV_BSIZE;
179 }
180
181 void
182 set_fsblocksize(struct vnode *vp)
183 {
184
185 if (vp->v_type == VBLK) {
186 dev_t dev = (dev_t)vp->v_rdev;
187 int maj = major(dev);
188
189 if ((u_int)maj >= (u_int)nblkdev)
190 return;
191
192 vnode_lock(vp);
193 set_blocksize(vp, dev);
194 vnode_unlock(vp);
195 }
196
197 }
198
199
200 /*
201 * Open a special file.
202 */
203 int
204 spec_open(ap)
205 struct vnop_open_args /* {
206 struct vnode *a_vp;
207 int a_mode;
208 vfs_context_t a_context;
209 } */ *ap;
210 {
211 struct proc *p = vfs_context_proc(ap->a_context);
212 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
213 struct vnode *vp = ap->a_vp;
214 dev_t bdev, dev = (dev_t)vp->v_rdev;
215 int maj = major(dev);
216 int error;
217
218 /*
219 * Don't allow open if fs is mounted -nodev.
220 */
221 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
222 return (ENXIO);
223
224 switch (vp->v_type) {
225
226 case VCHR:
227 if ((u_int)maj >= (u_int)nchrdev)
228 return (ENXIO);
229 if (cred != FSCRED && (ap->a_mode & FWRITE)) {
230 /*
231 * When running in very secure mode, do not allow
232 * opens for writing of any disk character devices.
233 */
234 if (securelevel >= 2 && isdisk(dev, VCHR))
235 return (EPERM);
236 /*
237 * When running in secure mode, do not allow opens
238 * for writing of /dev/mem, /dev/kmem, or character
239 * devices whose corresponding block devices are
240 * currently mounted.
241 */
242 if (securelevel >= 1) {
243 if ((bdev = chrtoblk(dev)) != NODEV && check_mountedon(bdev, VBLK, &error))
244 return (error);
245 if (iskmemdev(dev))
246 return (EPERM);
247 }
248 }
249 if (cdevsw[maj].d_type == D_TTY) {
250 vnode_lock(vp);
251 vp->v_flag |= VISTTY;
252 vnode_unlock(vp);
253 }
254 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p);
255 return (error);
256
257 case VBLK:
258 if ((u_int)maj >= (u_int)nblkdev)
259 return (ENXIO);
260 /*
261 * When running in very secure mode, do not allow
262 * opens for writing of any disk block devices.
263 */
264 if (securelevel >= 2 && cred != FSCRED &&
265 (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK)
266 return (EPERM);
267 /*
268 * Do not allow opens of block devices that are
269 * currently mounted.
270 */
271 if ( (error = vfs_mountedon(vp)) )
272 return (error);
273 error = (*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p);
274 if (!error) {
275 u_int64_t blkcnt;
276 u_int32_t blksize;
277 int setsize = 0;
278 u_int32_t size512 = 512;
279
280
281 if (!VNOP_IOCTL(vp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, ap->a_context)) {
282 /* Switch to 512 byte sectors (temporarily) */
283
284 if (!VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, ap->a_context)) {
285 /* Get the number of 512 byte physical blocks. */
286 if (!VNOP_IOCTL(vp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, ap->a_context)) {
287 setsize = 1;
288 }
289 }
290 /* If it doesn't set back, we can't recover */
291 if (VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, ap->a_context))
292 error = ENXIO;
293 }
294
295
296 vnode_lock(vp);
297 set_blocksize(vp, dev);
298
299 /*
300 * Cache the size in bytes of the block device for later
301 * use by spec_write().
302 */
303 if (setsize)
304 vp->v_specdevsize = blkcnt * (u_int64_t)size512;
305 else
306 vp->v_specdevsize = (u_int64_t)0; /* Default: Can't get */
307
308 vnode_unlock(vp);
309
310 }
311 return(error);
312 default:
313 panic("spec_open type");
314 }
315 return (0);
316 }
317
318 /*
319 * Vnode op for read
320 */
321 int
322 spec_read(ap)
323 struct vnop_read_args /* {
324 struct vnode *a_vp;
325 struct uio *a_uio;
326 int a_ioflag;
327 vfs_context_t a_context;
328 } */ *ap;
329 {
330 register struct vnode *vp = ap->a_vp;
331 register struct uio *uio = ap->a_uio;
332 struct buf *bp;
333 daddr64_t bn, nextbn;
334 long bsize, bscale;
335 int devBlockSize=0;
336 int n, on;
337 int error = 0;
338 dev_t dev;
339
340 #if DIAGNOSTIC
341 if (uio->uio_rw != UIO_READ)
342 panic("spec_read mode");
343 if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg))
344 panic("spec_read proc");
345 #endif
346 if (uio_resid(uio) == 0)
347 return (0);
348
349 switch (vp->v_type) {
350
351 case VCHR:
352 error = (*cdevsw[major(vp->v_rdev)].d_read)
353 (vp->v_rdev, uio, ap->a_ioflag);
354 return (error);
355
356 case VBLK:
357 if (uio->uio_offset < 0)
358 return (EINVAL);
359
360 dev = vp->v_rdev;
361
362 devBlockSize = vp->v_specsize;
363
364 if (devBlockSize > PAGE_SIZE)
365 return (EINVAL);
366
367 bscale = PAGE_SIZE / devBlockSize;
368 bsize = bscale * devBlockSize;
369
370 do {
371 on = uio->uio_offset % bsize;
372
373 bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ (bscale - 1));
374
375 if (vp->v_speclastr + bscale == bn) {
376 nextbn = bn + bscale;
377 error = buf_breadn(vp, bn, (int)bsize, &nextbn,
378 (int *)&bsize, 1, NOCRED, &bp);
379 } else
380 error = buf_bread(vp, bn, (int)bsize, NOCRED, &bp);
381
382 vnode_lock(vp);
383 vp->v_speclastr = bn;
384 vnode_unlock(vp);
385
386 n = bsize - buf_resid(bp);
387 if ((on > n) || error) {
388 if (!error)
389 error = EINVAL;
390 buf_brelse(bp);
391 return (error);
392 }
393 // LP64todo - fix this!
394 n = min((unsigned)(n - on), uio_resid(uio));
395
396 error = uiomove((char *)buf_dataptr(bp) + on, n, uio);
397 if (n + on == bsize)
398 buf_markaged(bp);
399 buf_brelse(bp);
400 } while (error == 0 && uio_resid(uio) > 0 && n != 0);
401 return (error);
402
403 default:
404 panic("spec_read type");
405 }
406 /* NOTREACHED */
407
408 return (0);
409 }
410
411 /*
412 * Vnode op for write
413 */
414 int
415 spec_write(ap)
416 struct vnop_write_args /* {
417 struct vnode *a_vp;
418 struct uio *a_uio;
419 int a_ioflag;
420 vfs_context_t a_context;
421 } */ *ap;
422 {
423 register struct vnode *vp = ap->a_vp;
424 register struct uio *uio = ap->a_uio;
425 struct buf *bp;
426 daddr64_t bn;
427 int bsize, blkmask, bscale;
428 register int io_sync;
429 register int io_size;
430 int devBlockSize=0;
431 register int n, on;
432 int error = 0;
433 dev_t dev;
434
435 #if DIAGNOSTIC
436 if (uio->uio_rw != UIO_WRITE)
437 panic("spec_write mode");
438 if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg))
439 panic("spec_write proc");
440 #endif
441
442 switch (vp->v_type) {
443
444 case VCHR:
445 error = (*cdevsw[major(vp->v_rdev)].d_write)
446 (vp->v_rdev, uio, ap->a_ioflag);
447 return (error);
448
449 case VBLK:
450 if (uio_resid(uio) == 0)
451 return (0);
452 if (uio->uio_offset < 0)
453 return (EINVAL);
454
455 io_sync = (ap->a_ioflag & IO_SYNC);
456 // LP64todo - fix this!
457 io_size = uio_resid(uio);
458
459 dev = (vp->v_rdev);
460
461 devBlockSize = vp->v_specsize;
462 if (devBlockSize > PAGE_SIZE)
463 return(EINVAL);
464
465 bscale = PAGE_SIZE / devBlockSize;
466 blkmask = bscale - 1;
467 bsize = bscale * devBlockSize;
468
469
470 do {
471 bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ blkmask);
472 on = uio->uio_offset % bsize;
473
474 // LP64todo - fix this!
475 n = min((unsigned)(bsize - on), uio_resid(uio));
476
477 /*
478 * Use buf_getblk() as an optimization IFF:
479 *
480 * 1) We are reading exactly a block on a block
481 * aligned boundary
482 * 2) We know the size of the device from spec_open
483 * 3) The read doesn't span the end of the device
484 *
485 * Otherwise, we fall back on buf_bread().
486 */
487 if (n == bsize &&
488 vp->v_specdevsize != (u_int64_t)0 &&
489 (uio->uio_offset + (u_int64_t)n) > vp->v_specdevsize) {
490 /* reduce the size of the read to what is there */
491 n = (uio->uio_offset + (u_int64_t)n) - vp->v_specdevsize;
492 }
493
494 if (n == bsize)
495 bp = buf_getblk(vp, bn, bsize, 0, 0, BLK_WRITE);
496 else
497 error = (int)buf_bread(vp, bn, bsize, NOCRED, &bp);
498
499 /* Translate downstream error for upstream, if needed */
500 if (!error)
501 error = (int)buf_error(bp);
502 if (error) {
503 buf_brelse(bp);
504 return (error);
505 }
506 n = min(n, bsize - buf_resid(bp));
507
508 error = uiomove((char *)buf_dataptr(bp) + on, n, uio);
509 if (error) {
510 buf_brelse(bp);
511 return (error);
512 }
513 buf_markaged(bp);
514
515 if (io_sync)
516 error = buf_bwrite(bp);
517 else {
518 if ((n + on) == bsize)
519 error = buf_bawrite(bp);
520 else
521 error = buf_bdwrite(bp);
522 }
523 } while (error == 0 && uio_resid(uio) > 0 && n != 0);
524 return (error);
525
526 default:
527 panic("spec_write type");
528 }
529 /* NOTREACHED */
530
531 return (0);
532 }
533
534 /*
535 * Device ioctl operation.
536 */
537 int
538 spec_ioctl(ap)
539 struct vnop_ioctl_args /* {
540 struct vnode *a_vp;
541 int a_command;
542 caddr_t a_data;
543 int a_fflag;
544 vfs_context_t a_context;
545 } */ *ap;
546 {
547 proc_t p = vfs_context_proc(ap->a_context);
548 dev_t dev = ap->a_vp->v_rdev;
549
550 switch (ap->a_vp->v_type) {
551
552 case VCHR:
553 return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
554 ap->a_fflag, p));
555
556 case VBLK:
557 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) {
558 if (bdevsw[major(dev)].d_type == D_TAPE)
559 return (0);
560 else
561 return (1);
562 }
563 return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
564 ap->a_fflag, p));
565
566 default:
567 panic("spec_ioctl");
568 /* NOTREACHED */
569 }
570 return (0);
571 }
572
573 int
574 spec_select(ap)
575 struct vnop_select_args /* {
576 struct vnode *a_vp;
577 int a_which;
578 int a_fflags;
579 void * a_wql;
580 vfs_context_t a_context;
581 } */ *ap;
582 {
583 proc_t p = vfs_context_proc(ap->a_context);
584 register dev_t dev;
585
586 switch (ap->a_vp->v_type) {
587
588 default:
589 return (1); /* XXX */
590
591 case VCHR:
592 dev = ap->a_vp->v_rdev;
593 return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_wql, p);
594 }
595 }
596
597 /*
598 * Synch buffers associated with a block device
599 */
600 int
601 spec_fsync_internal(vnode_t vp, int waitfor, __unused vfs_context_t context)
602 {
603 if (vp->v_type == VCHR)
604 return (0);
605 /*
606 * Flush all dirty buffers associated with a block device.
607 */
608 buf_flushdirtyblks(vp, waitfor == MNT_WAIT, 0, (char *)"spec_fsync");
609
610 return (0);
611 }
612
613 int
614 spec_fsync(ap)
615 struct vnop_fsync_args /* {
616 struct vnode *a_vp;
617 int a_waitfor;
618 vfs_context_t a_context;
619 } */ *ap;
620 {
621 return spec_fsync_internal(ap->a_vp, ap->a_waitfor, ap->a_context);
622 }
623
624 /*
625 * Just call the device strategy routine
626 */
627 extern int hard_throttle_on_root;
628
629
630 #define LOWPRI_DELAY_MSECS 200
631 #define LOWPRI_WINDOW_MSECS 200
632
633 int lowpri_IO_window_msecs = LOWPRI_WINDOW_MSECS;
634 int lowpri_IO_delay_msecs = LOWPRI_DELAY_MSECS;
635
636 struct timeval last_normal_IO_timestamp;
637 struct timeval last_lowpri_IO_timestamp;
638 struct timeval lowpri_IO_window = { 0, LOWPRI_WINDOW_MSECS * 1000 };
639
640 int
641 spec_strategy(ap)
642 struct vnop_strategy_args /* {
643 struct buf *a_bp;
644 } */ *ap;
645 {
646 buf_t bp;
647 int bflags;
648 dev_t bdev;
649 proc_t p;
650 struct timeval elapsed;
651
652 bp = ap->a_bp;
653 bdev = buf_device(bp);
654 bflags = buf_flags(bp);
655
656 if (kdebug_enable) {
657 int code = 0;
658
659 if (bflags & B_READ)
660 code |= DKIO_READ;
661 if (bflags & B_ASYNC)
662 code |= DKIO_ASYNC;
663
664 if (bflags & B_META)
665 code |= DKIO_META;
666 else if (bflags & B_PAGEIO)
667 code |= DKIO_PAGING;
668
669 KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE,
670 (unsigned int)bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0);
671 }
672 if (((bflags & (B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) &&
673 (buf_vnode(bp)->v_mount->mnt_kern_flag & MNTK_ROOTDEV))
674 hard_throttle_on_root = 1;
675
676 if ( lowpri_IO_delay_msecs && lowpri_IO_window_msecs ) {
677 p = current_proc();
678
679 if ( (p == NULL) || !(p->p_lflag & P_LLOW_PRI_IO)) {
680 if (!(p->p_lflag & P_LBACKGROUND_IO))
681 microuptime(&last_normal_IO_timestamp);
682 } else {
683 microuptime(&last_lowpri_IO_timestamp);
684
685 elapsed = last_lowpri_IO_timestamp;
686 timevalsub(&elapsed, &last_normal_IO_timestamp);
687
688 lowpri_IO_window.tv_sec = lowpri_IO_window_msecs / 1000;
689 lowpri_IO_window.tv_usec = (lowpri_IO_window_msecs % 1000) * 1000;
690
691 if (timevalcmp(&elapsed, &lowpri_IO_window, <)) {
692 struct uthread *ut;
693
694 /*
695 * I'd really like to do the IOSleep here, but
696 * we may be holding all kinds of filesystem related locks
697 * and the pages for this I/O marked 'busy'...
698 * we don't want to cause a normal task to block on
699 * one of these locks while we're throttling a task marked
700 * for low priority I/O... we'll mark the uthread and
701 * do the delay just before we return from the system
702 * call that triggered this I/O or from vnode_pagein
703 */
704 ut = get_bsdthread_info(current_thread());
705 ut->uu_lowpri_delay = lowpri_IO_delay_msecs;
706 }
707 }
708 }
709 (*bdevsw[major(bdev)].d_strategy)(bp);
710
711 return (0);
712 }
713
714
715 /*
716 * This is a noop, simply returning what one has been given.
717 */
718 int
719 spec_blockmap(__unused struct vnop_blockmap_args *ap)
720 {
721 return (ENOTSUP);
722 }
723
724
725 /*
726 * Device close routine
727 */
728 int
729 spec_close(ap)
730 struct vnop_close_args /* {
731 struct vnode *a_vp;
732 int a_fflag;
733 vfs_context_t a_context;
734 } */ *ap;
735 {
736 register struct vnode *vp = ap->a_vp;
737 dev_t dev = vp->v_rdev;
738 int (*devclose)(dev_t, int, int, struct proc *);
739 int mode, error;
740 struct proc *p = vfs_context_proc(ap->a_context);
741
742 switch (vp->v_type) {
743
744 case VCHR:
745 /*
746 * Hack: a tty device that is a controlling terminal
747 * has a reference from the session structure.
748 * We cannot easily tell that a character device is
749 * a controlling terminal, unless it is the closing
750 * process' controlling terminal. In that case,
751 * if the reference count is 2 (this last descriptor
752 * plus the session), release the reference from the session.
753 */
754 if (vcount(vp) == 2 && p &&
755 vp == p->p_session->s_ttyvp) {
756 p->p_session->s_ttyvp = NULL;
757 vnode_rele(vp);
758 }
759 /*
760 * close on last reference.
761 */
762 if (vcount(vp) > 1)
763 return (0);
764 devclose = cdevsw[major(dev)].d_close;
765 mode = S_IFCHR;
766 break;
767
768 case VBLK:
769 #ifdef DEVFS_IMPLEMENTS_LOCKING
770 /*
771 * On last close of a block device (that isn't mounted)
772 * we must invalidate any in core blocks, so that
773 * we can, for instance, change floppy disks.
774 */
775 if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context)))
776 return (error);
777
778 error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
779 if (error)
780 return (error);
781 /*
782 * Since every use (buffer, vnode, swap, blockmap)
783 * holds a reference to the vnode, and because we mark
784 * any other vnodes that alias this device, when the
785 * sum of the reference counts on all the aliased
786 * vnodes descends to one, we are on last close.
787 */
788 if (vcount(vp) > 1)
789 return (0);
790 #else /* DEVFS_IMPLEMENTS_LOCKING */
791 /*
792 * Since every use (buffer, vnode, swap, blockmap)
793 * holds a reference to the vnode, and because we mark
794 * any other vnodes that alias this device, when the
795 * sum of the reference counts on all the aliased
796 * vnodes descends to one, we are on last close.
797 */
798 if (vcount(vp) > 1)
799 return (0);
800
801 /*
802 * On last close of a block device (that isn't mounted)
803 * we must invalidate any in core blocks, so that
804 * we can, for instance, change floppy disks.
805 */
806 if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context)))
807 return (error);
808
809 error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
810 if (error)
811 return (error);
812 #endif /* DEVFS_IMPLEMENTS_LOCKING */
813 devclose = bdevsw[major(dev)].d_close;
814 mode = S_IFBLK;
815 break;
816
817 default:
818 panic("spec_close: not special");
819 }
820
821 return ((*devclose)(dev, ap->a_fflag, mode, p));
822 }
823
824 /*
825 * Return POSIX pathconf information applicable to special devices.
826 */
827 int
828 spec_pathconf(ap)
829 struct vnop_pathconf_args /* {
830 struct vnode *a_vp;
831 int a_name;
832 int *a_retval;
833 vfs_context_t a_context;
834 } */ *ap;
835 {
836
837 switch (ap->a_name) {
838 case _PC_LINK_MAX:
839 *ap->a_retval = LINK_MAX;
840 return (0);
841 case _PC_MAX_CANON:
842 *ap->a_retval = MAX_CANON;
843 return (0);
844 case _PC_MAX_INPUT:
845 *ap->a_retval = MAX_INPUT;
846 return (0);
847 case _PC_PIPE_BUF:
848 *ap->a_retval = PIPE_BUF;
849 return (0);
850 case _PC_CHOWN_RESTRICTED:
851 *ap->a_retval = 1;
852 return (0);
853 case _PC_VDISABLE:
854 *ap->a_retval = _POSIX_VDISABLE;
855 return (0);
856 default:
857 return (EINVAL);
858 }
859 /* NOTREACHED */
860 }
861
862 /*
863 * Special device failed operation
864 */
865 int
866 spec_ebadf(__unused void *dummy)
867 {
868
869 return (EBADF);
870 }
871
872 /*
873 * Special device bad operation
874 */
875 int
876 spec_badop()
877 {
878
879 panic("spec_badop called");
880 /* NOTREACHED */
881 }
882
883 /* Blktooff derives file offset from logical block number */
884 int
885 spec_blktooff(ap)
886 struct vnop_blktooff_args /* {
887 struct vnode *a_vp;
888 daddr64_t a_lblkno;
889 off_t *a_offset;
890 } */ *ap;
891 {
892 register struct vnode *vp = ap->a_vp;
893
894 switch (vp->v_type) {
895 case VCHR:
896 *ap->a_offset = (off_t)-1; /* failure */
897 return (ENOTSUP);
898
899 case VBLK:
900 printf("spec_blktooff: not implemented for VBLK\n");
901 *ap->a_offset = (off_t)-1; /* failure */
902 return (ENOTSUP);
903
904 default:
905 panic("spec_blktooff type");
906 }
907 /* NOTREACHED */
908
909 return (0);
910 }
911
912 /* Offtoblk derives logical block number from file offset */
913 int
914 spec_offtoblk(ap)
915 struct vnop_offtoblk_args /* {
916 struct vnode *a_vp;
917 off_t a_offset;
918 daddr64_t *a_lblkno;
919 } */ *ap;
920 {
921 register struct vnode *vp = ap->a_vp;
922
923 switch (vp->v_type) {
924 case VCHR:
925 *ap->a_lblkno = (daddr64_t)-1; /* failure */
926 return (ENOTSUP);
927
928 case VBLK:
929 printf("spec_offtoblk: not implemented for VBLK\n");
930 *ap->a_lblkno = (daddr64_t)-1; /* failure */
931 return (ENOTSUP);
932
933 default:
934 panic("spec_offtoblk type");
935 }
936 /* NOTREACHED */
937
938 return (0);
939 }