]> git.saurik.com Git - apple/xnu.git/blob - bsd/miscfs/specfs/spec_vnops.c
xnu-792.12.6.tar.gz
[apple/xnu.git] / bsd / miscfs / specfs / spec_vnops.c
1 /*
2 * Copyright (c) 2006 Apple Computer, Inc. All Rights Reserved.
3 *
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
29 */
30 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
31 /*
32 * Copyright (c) 1989, 1993, 1995
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95
64 */
65
66 #include <sys/param.h>
67 #include <sys/proc_internal.h>
68 #include <sys/kauth.h>
69 #include <sys/systm.h>
70 #include <sys/kernel.h>
71 #include <sys/conf.h>
72 #include <sys/buf_internal.h>
73 #include <sys/mount_internal.h>
74 #include <sys/namei.h>
75 #include <sys/vnode_internal.h>
76 #include <sys/stat.h>
77 #include <sys/errno.h>
78 #include <sys/ioctl.h>
79 #include <sys/file.h>
80 #include <sys/user.h>
81 #include <sys/malloc.h>
82 #include <sys/disk.h>
83 #include <sys/uio_internal.h>
84 #include <miscfs/specfs/specdev.h>
85 #include <vfs/vfs_support.h>
86
87 #include <sys/kdebug.h>
88
89 struct vnode *speclisth[SPECHSZ];
90
91 /* symbolic sleep message strings for devices */
92 char devopn[] = "devopn";
93 char devio[] = "devio";
94 char devwait[] = "devwait";
95 char devin[] = "devin";
96 char devout[] = "devout";
97 char devioc[] = "devioc";
98 char devcls[] = "devcls";
99
100 #define VOPFUNC int (*)(void *)
101
102 int (**spec_vnodeop_p)(void *);
103 struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
104 { &vnop_default_desc, (VOPFUNC)vn_default_error },
105 { &vnop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */
106 { &vnop_create_desc, (VOPFUNC)err_create }, /* create */
107 { &vnop_mknod_desc, (VOPFUNC)err_mknod }, /* mknod */
108 { &vnop_open_desc, (VOPFUNC)spec_open }, /* open */
109 { &vnop_close_desc, (VOPFUNC)spec_close }, /* close */
110 { &vnop_access_desc, (VOPFUNC)spec_access }, /* access */
111 { &vnop_getattr_desc, (VOPFUNC)spec_getattr }, /* getattr */
112 { &vnop_setattr_desc, (VOPFUNC)spec_setattr }, /* setattr */
113 { &vnop_read_desc, (VOPFUNC)spec_read }, /* read */
114 { &vnop_write_desc, (VOPFUNC)spec_write }, /* write */
115 { &vnop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */
116 { &vnop_select_desc, (VOPFUNC)spec_select }, /* select */
117 { &vnop_revoke_desc, (VOPFUNC)nop_revoke }, /* revoke */
118 { &vnop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */
119 { &vnop_fsync_desc, (VOPFUNC)spec_fsync }, /* fsync */
120 { &vnop_remove_desc, (VOPFUNC)err_remove }, /* remove */
121 { &vnop_link_desc, (VOPFUNC)err_link }, /* link */
122 { &vnop_rename_desc, (VOPFUNC)err_rename }, /* rename */
123 { &vnop_mkdir_desc, (VOPFUNC)err_mkdir }, /* mkdir */
124 { &vnop_rmdir_desc, (VOPFUNC)err_rmdir }, /* rmdir */
125 { &vnop_symlink_desc, (VOPFUNC)err_symlink }, /* symlink */
126 { &vnop_readdir_desc, (VOPFUNC)err_readdir }, /* readdir */
127 { &vnop_readlink_desc, (VOPFUNC)err_readlink }, /* readlink */
128 { &vnop_inactive_desc, (VOPFUNC)nop_inactive }, /* inactive */
129 { &vnop_reclaim_desc, (VOPFUNC)nop_reclaim }, /* reclaim */
130 { &vnop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */
131 { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */
132 { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */
133 { &vnop_bwrite_desc, (VOPFUNC)spec_bwrite }, /* bwrite */
134 { &vnop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */
135 { &vnop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */
136 { &vnop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */
137 { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */
138 { &vnop_blktooff_desc, (VOPFUNC)spec_blktooff }, /* blktooff */
139 { &vnop_offtoblk_desc, (VOPFUNC)spec_offtoblk }, /* offtoblk */
140 { &vnop_blockmap_desc, (VOPFUNC)spec_blockmap }, /* blockmap */
141 { (struct vnodeop_desc*)NULL, (int(*)())NULL }
142 };
143 struct vnodeopv_desc spec_vnodeop_opv_desc =
144 { &spec_vnodeop_p, spec_vnodeop_entries };
145
146
147 static void set_blocksize(vnode_t, dev_t);
148
149
150 /*
151 * Trivial lookup routine that always fails.
152 */
153 int
154 spec_lookup(ap)
155 struct vnop_lookup_args /* {
156 struct vnode *a_dvp;
157 struct vnode **a_vpp;
158 struct componentname *a_cnp;
159 vfs_context_t a_context;
160 } */ *ap;
161 {
162
163 *ap->a_vpp = NULL;
164 return (ENOTDIR);
165 }
166
167 static void
168 set_blocksize(struct vnode *vp, dev_t dev)
169 {
170 int (*size)(dev_t);
171 int rsize;
172
173 if ((major(dev) < nblkdev) && (size = bdevsw[major(dev)].d_psize)) {
174 rsize = (*size)(dev);
175 if (rsize <= 0) /* did size fail? */
176 vp->v_specsize = DEV_BSIZE;
177 else
178 vp->v_specsize = rsize;
179 }
180 else
181 vp->v_specsize = DEV_BSIZE;
182 }
183
184 void
185 set_fsblocksize(struct vnode *vp)
186 {
187
188 if (vp->v_type == VBLK) {
189 dev_t dev = (dev_t)vp->v_rdev;
190 int maj = major(dev);
191
192 if ((u_int)maj >= (u_int)nblkdev)
193 return;
194
195 vnode_lock(vp);
196 set_blocksize(vp, dev);
197 vnode_unlock(vp);
198 }
199
200 }
201
202
203 /*
204 * Open a special file.
205 */
206 int
207 spec_open(ap)
208 struct vnop_open_args /* {
209 struct vnode *a_vp;
210 int a_mode;
211 vfs_context_t a_context;
212 } */ *ap;
213 {
214 struct proc *p = vfs_context_proc(ap->a_context);
215 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
216 struct vnode *vp = ap->a_vp;
217 dev_t bdev, dev = (dev_t)vp->v_rdev;
218 int maj = major(dev);
219 int error;
220
221 /*
222 * Don't allow open if fs is mounted -nodev.
223 */
224 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
225 return (ENXIO);
226
227 switch (vp->v_type) {
228
229 case VCHR:
230 if ((u_int)maj >= (u_int)nchrdev)
231 return (ENXIO);
232 if (cred != FSCRED && (ap->a_mode & FWRITE)) {
233 /*
234 * When running in very secure mode, do not allow
235 * opens for writing of any disk character devices.
236 */
237 if (securelevel >= 2 && isdisk(dev, VCHR))
238 return (EPERM);
239 /*
240 * When running in secure mode, do not allow opens
241 * for writing of /dev/mem, /dev/kmem, or character
242 * devices whose corresponding block devices are
243 * currently mounted.
244 */
245 if (securelevel >= 1) {
246 if ((bdev = chrtoblk(dev)) != NODEV && check_mountedon(bdev, VBLK, &error))
247 return (error);
248 if (iskmemdev(dev))
249 return (EPERM);
250 }
251 }
252 if (cdevsw[maj].d_type == D_TTY) {
253 vnode_lock(vp);
254 vp->v_flag |= VISTTY;
255 vnode_unlock(vp);
256 }
257 error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p);
258 return (error);
259
260 case VBLK:
261 if ((u_int)maj >= (u_int)nblkdev)
262 return (ENXIO);
263 /*
264 * When running in very secure mode, do not allow
265 * opens for writing of any disk block devices.
266 */
267 if (securelevel >= 2 && cred != FSCRED &&
268 (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK)
269 return (EPERM);
270 /*
271 * Do not allow opens of block devices that are
272 * currently mounted.
273 */
274 if ( (error = vfs_mountedon(vp)) )
275 return (error);
276 error = (*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p);
277 if (!error) {
278 u_int64_t blkcnt;
279 u_int32_t blksize;
280 int setsize = 0;
281 u_int32_t size512 = 512;
282
283
284 if (!VNOP_IOCTL(vp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, ap->a_context)) {
285 /* Switch to 512 byte sectors (temporarily) */
286
287 if (!VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, ap->a_context)) {
288 /* Get the number of 512 byte physical blocks. */
289 if (!VNOP_IOCTL(vp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, ap->a_context)) {
290 setsize = 1;
291 }
292 }
293 /* If it doesn't set back, we can't recover */
294 if (VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, ap->a_context))
295 error = ENXIO;
296 }
297
298
299 vnode_lock(vp);
300 set_blocksize(vp, dev);
301
302 /*
303 * Cache the size in bytes of the block device for later
304 * use by spec_write().
305 */
306 if (setsize)
307 vp->v_specdevsize = blkcnt * (u_int64_t)size512;
308 else
309 vp->v_specdevsize = (u_int64_t)0; /* Default: Can't get */
310
311 vnode_unlock(vp);
312
313 }
314 return(error);
315 default:
316 panic("spec_open type");
317 }
318 return (0);
319 }
320
321 /*
322 * Vnode op for read
323 */
324 int
325 spec_read(ap)
326 struct vnop_read_args /* {
327 struct vnode *a_vp;
328 struct uio *a_uio;
329 int a_ioflag;
330 vfs_context_t a_context;
331 } */ *ap;
332 {
333 register struct vnode *vp = ap->a_vp;
334 register struct uio *uio = ap->a_uio;
335 struct buf *bp;
336 daddr64_t bn, nextbn;
337 long bsize, bscale;
338 int devBlockSize=0;
339 int n, on;
340 int error = 0;
341 dev_t dev;
342
343 #if DIAGNOSTIC
344 if (uio->uio_rw != UIO_READ)
345 panic("spec_read mode");
346 if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg))
347 panic("spec_read proc");
348 #endif
349 if (uio_resid(uio) == 0)
350 return (0);
351
352 switch (vp->v_type) {
353
354 case VCHR:
355 error = (*cdevsw[major(vp->v_rdev)].d_read)
356 (vp->v_rdev, uio, ap->a_ioflag);
357 return (error);
358
359 case VBLK:
360 if (uio->uio_offset < 0)
361 return (EINVAL);
362
363 dev = vp->v_rdev;
364
365 devBlockSize = vp->v_specsize;
366
367 if (devBlockSize > PAGE_SIZE)
368 return (EINVAL);
369
370 bscale = PAGE_SIZE / devBlockSize;
371 bsize = bscale * devBlockSize;
372
373 do {
374 on = uio->uio_offset % bsize;
375
376 bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ (bscale - 1));
377
378 if (vp->v_speclastr + bscale == bn) {
379 nextbn = bn + bscale;
380 error = buf_breadn(vp, bn, (int)bsize, &nextbn,
381 (int *)&bsize, 1, NOCRED, &bp);
382 } else
383 error = buf_bread(vp, bn, (int)bsize, NOCRED, &bp);
384
385 vnode_lock(vp);
386 vp->v_speclastr = bn;
387 vnode_unlock(vp);
388
389 n = bsize - buf_resid(bp);
390 if ((on > n) || error) {
391 if (!error)
392 error = EINVAL;
393 buf_brelse(bp);
394 return (error);
395 }
396 // LP64todo - fix this!
397 n = min((unsigned)(n - on), uio_resid(uio));
398
399 error = uiomove((char *)buf_dataptr(bp) + on, n, uio);
400 if (n + on == bsize)
401 buf_markaged(bp);
402 buf_brelse(bp);
403 } while (error == 0 && uio_resid(uio) > 0 && n != 0);
404 return (error);
405
406 default:
407 panic("spec_read type");
408 }
409 /* NOTREACHED */
410
411 return (0);
412 }
413
414 /*
415 * Vnode op for write
416 */
417 int
418 spec_write(ap)
419 struct vnop_write_args /* {
420 struct vnode *a_vp;
421 struct uio *a_uio;
422 int a_ioflag;
423 vfs_context_t a_context;
424 } */ *ap;
425 {
426 register struct vnode *vp = ap->a_vp;
427 register struct uio *uio = ap->a_uio;
428 struct buf *bp;
429 daddr64_t bn;
430 int bsize, blkmask, bscale;
431 register int io_sync;
432 register int io_size;
433 int devBlockSize=0;
434 register int n, on;
435 int error = 0;
436 dev_t dev;
437
438 #if DIAGNOSTIC
439 if (uio->uio_rw != UIO_WRITE)
440 panic("spec_write mode");
441 if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg))
442 panic("spec_write proc");
443 #endif
444
445 switch (vp->v_type) {
446
447 case VCHR:
448 error = (*cdevsw[major(vp->v_rdev)].d_write)
449 (vp->v_rdev, uio, ap->a_ioflag);
450 return (error);
451
452 case VBLK:
453 if (uio_resid(uio) == 0)
454 return (0);
455 if (uio->uio_offset < 0)
456 return (EINVAL);
457
458 io_sync = (ap->a_ioflag & IO_SYNC);
459 // LP64todo - fix this!
460 io_size = uio_resid(uio);
461
462 dev = (vp->v_rdev);
463
464 devBlockSize = vp->v_specsize;
465 if (devBlockSize > PAGE_SIZE)
466 return(EINVAL);
467
468 bscale = PAGE_SIZE / devBlockSize;
469 blkmask = bscale - 1;
470 bsize = bscale * devBlockSize;
471
472
473 do {
474 bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ blkmask);
475 on = uio->uio_offset % bsize;
476
477 // LP64todo - fix this!
478 n = min((unsigned)(bsize - on), uio_resid(uio));
479
480 /*
481 * Use buf_getblk() as an optimization IFF:
482 *
483 * 1) We are reading exactly a block on a block
484 * aligned boundary
485 * 2) We know the size of the device from spec_open
486 * 3) The read doesn't span the end of the device
487 *
488 * Otherwise, we fall back on buf_bread().
489 */
490 if (n == bsize &&
491 vp->v_specdevsize != (u_int64_t)0 &&
492 (uio->uio_offset + (u_int64_t)n) > vp->v_specdevsize) {
493 /* reduce the size of the read to what is there */
494 n = (uio->uio_offset + (u_int64_t)n) - vp->v_specdevsize;
495 }
496
497 if (n == bsize)
498 bp = buf_getblk(vp, bn, bsize, 0, 0, BLK_WRITE);
499 else
500 error = (int)buf_bread(vp, bn, bsize, NOCRED, &bp);
501
502 /* Translate downstream error for upstream, if needed */
503 if (!error)
504 error = (int)buf_error(bp);
505 if (error) {
506 buf_brelse(bp);
507 return (error);
508 }
509 n = min(n, bsize - buf_resid(bp));
510
511 error = uiomove((char *)buf_dataptr(bp) + on, n, uio);
512 if (error) {
513 buf_brelse(bp);
514 return (error);
515 }
516 buf_markaged(bp);
517
518 if (io_sync)
519 error = buf_bwrite(bp);
520 else {
521 if ((n + on) == bsize)
522 error = buf_bawrite(bp);
523 else
524 error = buf_bdwrite(bp);
525 }
526 } while (error == 0 && uio_resid(uio) > 0 && n != 0);
527 return (error);
528
529 default:
530 panic("spec_write type");
531 }
532 /* NOTREACHED */
533
534 return (0);
535 }
536
537 /*
538 * Device ioctl operation.
539 */
540 int
541 spec_ioctl(ap)
542 struct vnop_ioctl_args /* {
543 struct vnode *a_vp;
544 int a_command;
545 caddr_t a_data;
546 int a_fflag;
547 vfs_context_t a_context;
548 } */ *ap;
549 {
550 proc_t p = vfs_context_proc(ap->a_context);
551 dev_t dev = ap->a_vp->v_rdev;
552
553 switch (ap->a_vp->v_type) {
554
555 case VCHR:
556 return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
557 ap->a_fflag, p));
558
559 case VBLK:
560 if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) {
561 if (bdevsw[major(dev)].d_type == D_TAPE)
562 return (0);
563 else
564 return (1);
565 }
566 return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
567 ap->a_fflag, p));
568
569 default:
570 panic("spec_ioctl");
571 /* NOTREACHED */
572 }
573 return (0);
574 }
575
576 int
577 spec_select(ap)
578 struct vnop_select_args /* {
579 struct vnode *a_vp;
580 int a_which;
581 int a_fflags;
582 void * a_wql;
583 vfs_context_t a_context;
584 } */ *ap;
585 {
586 proc_t p = vfs_context_proc(ap->a_context);
587 register dev_t dev;
588
589 switch (ap->a_vp->v_type) {
590
591 default:
592 return (1); /* XXX */
593
594 case VCHR:
595 dev = ap->a_vp->v_rdev;
596 return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_wql, p);
597 }
598 }
599
600 /*
601 * Synch buffers associated with a block device
602 */
603 int
604 spec_fsync_internal(vnode_t vp, int waitfor, __unused vfs_context_t context)
605 {
606 if (vp->v_type == VCHR)
607 return (0);
608 /*
609 * Flush all dirty buffers associated with a block device.
610 */
611 buf_flushdirtyblks(vp, waitfor == MNT_WAIT, 0, (char *)"spec_fsync");
612
613 return (0);
614 }
615
616 int
617 spec_fsync(ap)
618 struct vnop_fsync_args /* {
619 struct vnode *a_vp;
620 int a_waitfor;
621 vfs_context_t a_context;
622 } */ *ap;
623 {
624 return spec_fsync_internal(ap->a_vp, ap->a_waitfor, ap->a_context);
625 }
626
627 /*
628 * Just call the device strategy routine
629 */
630 extern int hard_throttle_on_root;
631
632
633 #define LOWPRI_DELAY_MSECS 200
634 #define LOWPRI_WINDOW_MSECS 200
635
636 int lowpri_IO_window_msecs = LOWPRI_WINDOW_MSECS;
637 int lowpri_IO_delay_msecs = LOWPRI_DELAY_MSECS;
638
639 struct timeval last_normal_IO_timestamp;
640 struct timeval last_lowpri_IO_timestamp;
641 struct timeval lowpri_IO_window = { 0, LOWPRI_WINDOW_MSECS * 1000 };
642
643 int
644 spec_strategy(ap)
645 struct vnop_strategy_args /* {
646 struct buf *a_bp;
647 } */ *ap;
648 {
649 buf_t bp;
650 int bflags;
651 dev_t bdev;
652 proc_t p;
653 struct timeval elapsed;
654
655 bp = ap->a_bp;
656 bdev = buf_device(bp);
657 bflags = buf_flags(bp);
658
659 if (kdebug_enable) {
660 int code = 0;
661
662 if (bflags & B_READ)
663 code |= DKIO_READ;
664 if (bflags & B_ASYNC)
665 code |= DKIO_ASYNC;
666
667 if (bflags & B_META)
668 code |= DKIO_META;
669 else if (bflags & B_PAGEIO)
670 code |= DKIO_PAGING;
671
672 KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE,
673 (unsigned int)bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0);
674 }
675 if (((bflags & (B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) &&
676 (buf_vnode(bp)->v_mount->mnt_kern_flag & MNTK_ROOTDEV))
677 hard_throttle_on_root = 1;
678
679 if ( lowpri_IO_delay_msecs && lowpri_IO_window_msecs ) {
680 p = current_proc();
681
682 if ( (p == NULL) || !(p->p_lflag & P_LLOW_PRI_IO)) {
683 if (!(p->p_lflag & P_LBACKGROUND_IO))
684 microuptime(&last_normal_IO_timestamp);
685 } else {
686 microuptime(&last_lowpri_IO_timestamp);
687
688 elapsed = last_lowpri_IO_timestamp;
689 timevalsub(&elapsed, &last_normal_IO_timestamp);
690
691 lowpri_IO_window.tv_sec = lowpri_IO_window_msecs / 1000;
692 lowpri_IO_window.tv_usec = (lowpri_IO_window_msecs % 1000) * 1000;
693
694 if (timevalcmp(&elapsed, &lowpri_IO_window, <)) {
695 struct uthread *ut;
696
697 /*
698 * I'd really like to do the IOSleep here, but
699 * we may be holding all kinds of filesystem related locks
700 * and the pages for this I/O marked 'busy'...
701 * we don't want to cause a normal task to block on
702 * one of these locks while we're throttling a task marked
703 * for low priority I/O... we'll mark the uthread and
704 * do the delay just before we return from the system
705 * call that triggered this I/O or from vnode_pagein
706 */
707 ut = get_bsdthread_info(current_thread());
708 ut->uu_lowpri_delay = lowpri_IO_delay_msecs;
709 }
710 }
711 }
712 (*bdevsw[major(bdev)].d_strategy)(bp);
713
714 return (0);
715 }
716
717
718 /*
719 * This is a noop, simply returning what one has been given.
720 */
721 int
722 spec_blockmap(__unused struct vnop_blockmap_args *ap)
723 {
724 return (ENOTSUP);
725 }
726
727
728 /*
729 * Device close routine
730 */
731 int
732 spec_close(ap)
733 struct vnop_close_args /* {
734 struct vnode *a_vp;
735 int a_fflag;
736 vfs_context_t a_context;
737 } */ *ap;
738 {
739 register struct vnode *vp = ap->a_vp;
740 dev_t dev = vp->v_rdev;
741 int (*devclose)(dev_t, int, int, struct proc *);
742 int mode, error;
743 struct proc *p = vfs_context_proc(ap->a_context);
744
745 switch (vp->v_type) {
746
747 case VCHR:
748 /*
749 * Hack: a tty device that is a controlling terminal
750 * has a reference from the session structure.
751 * We cannot easily tell that a character device is
752 * a controlling terminal, unless it is the closing
753 * process' controlling terminal. In that case,
754 * if the reference count is 2 (this last descriptor
755 * plus the session), release the reference from the session.
756 */
757 if (vcount(vp) == 2 && p &&
758 vp == p->p_session->s_ttyvp) {
759 p->p_session->s_ttyvp = NULL;
760 vnode_rele(vp);
761 }
762 /*
763 * close on last reference.
764 */
765 if (vcount(vp) > 1)
766 return (0);
767 devclose = cdevsw[major(dev)].d_close;
768 mode = S_IFCHR;
769 break;
770
771 case VBLK:
772 #ifdef DEVFS_IMPLEMENTS_LOCKING
773 /*
774 * On last close of a block device (that isn't mounted)
775 * we must invalidate any in core blocks, so that
776 * we can, for instance, change floppy disks.
777 */
778 if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context)))
779 return (error);
780
781 error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
782 if (error)
783 return (error);
784 /*
785 * Since every use (buffer, vnode, swap, blockmap)
786 * holds a reference to the vnode, and because we mark
787 * any other vnodes that alias this device, when the
788 * sum of the reference counts on all the aliased
789 * vnodes descends to one, we are on last close.
790 */
791 if (vcount(vp) > 1)
792 return (0);
793 #else /* DEVFS_IMPLEMENTS_LOCKING */
794 /*
795 * Since every use (buffer, vnode, swap, blockmap)
796 * holds a reference to the vnode, and because we mark
797 * any other vnodes that alias this device, when the
798 * sum of the reference counts on all the aliased
799 * vnodes descends to one, we are on last close.
800 */
801 if (vcount(vp) > 1)
802 return (0);
803
804 /*
805 * On last close of a block device (that isn't mounted)
806 * we must invalidate any in core blocks, so that
807 * we can, for instance, change floppy disks.
808 */
809 if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context)))
810 return (error);
811
812 error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
813 if (error)
814 return (error);
815 #endif /* DEVFS_IMPLEMENTS_LOCKING */
816 devclose = bdevsw[major(dev)].d_close;
817 mode = S_IFBLK;
818 break;
819
820 default:
821 panic("spec_close: not special");
822 }
823
824 return ((*devclose)(dev, ap->a_fflag, mode, p));
825 }
826
827 /*
828 * Return POSIX pathconf information applicable to special devices.
829 */
830 int
831 spec_pathconf(ap)
832 struct vnop_pathconf_args /* {
833 struct vnode *a_vp;
834 int a_name;
835 int *a_retval;
836 vfs_context_t a_context;
837 } */ *ap;
838 {
839
840 switch (ap->a_name) {
841 case _PC_LINK_MAX:
842 *ap->a_retval = LINK_MAX;
843 return (0);
844 case _PC_MAX_CANON:
845 *ap->a_retval = MAX_CANON;
846 return (0);
847 case _PC_MAX_INPUT:
848 *ap->a_retval = MAX_INPUT;
849 return (0);
850 case _PC_PIPE_BUF:
851 *ap->a_retval = PIPE_BUF;
852 return (0);
853 case _PC_CHOWN_RESTRICTED:
854 *ap->a_retval = 1;
855 return (0);
856 case _PC_VDISABLE:
857 *ap->a_retval = _POSIX_VDISABLE;
858 return (0);
859 default:
860 return (EINVAL);
861 }
862 /* NOTREACHED */
863 }
864
865 int
866 spec_devblocksize(ap)
867 struct vnop_devblocksize_args /* {
868 struct vnode *a_vp;
869 int *a_retval;
870 } */ *ap;
871 {
872 *ap->a_retval = (ap->a_vp->v_specsize);
873 return (0);
874 }
875
876 /*
877 * Special device failed operation
878 */
879 int
880 spec_ebadf(__unused void *dummy)
881 {
882
883 return (EBADF);
884 }
885
886 /*
887 * Special device bad operation
888 */
889 int
890 spec_badop()
891 {
892
893 panic("spec_badop called");
894 /* NOTREACHED */
895 }
896
897 /* Blktooff derives file offset from logical block number */
898 int
899 spec_blktooff(ap)
900 struct vnop_blktooff_args /* {
901 struct vnode *a_vp;
902 daddr64_t a_lblkno;
903 off_t *a_offset;
904 } */ *ap;
905 {
906 register struct vnode *vp = ap->a_vp;
907
908 switch (vp->v_type) {
909 case VCHR:
910 *ap->a_offset = (off_t)-1; /* failure */
911 return (ENOTSUP);
912
913 case VBLK:
914 printf("spec_blktooff: not implemented for VBLK\n");
915 *ap->a_offset = (off_t)-1; /* failure */
916 return (ENOTSUP);
917
918 default:
919 panic("spec_blktooff type");
920 }
921 /* NOTREACHED */
922
923 return (0);
924 }
925
926 /* Offtoblk derives logical block number from file offset */
927 int
928 spec_offtoblk(ap)
929 struct vnop_offtoblk_args /* {
930 struct vnode *a_vp;
931 off_t a_offset;
932 daddr64_t *a_lblkno;
933 } */ *ap;
934 {
935 register struct vnode *vp = ap->a_vp;
936
937 switch (vp->v_type) {
938 case VCHR:
939 *ap->a_lblkno = (daddr64_t)-1; /* failure */
940 return (ENOTSUP);
941
942 case VBLK:
943 printf("spec_offtoblk: not implemented for VBLK\n");
944 *ap->a_lblkno = (daddr64_t)-1; /* failure */
945 return (ENOTSUP);
946
947 default:
948 panic("spec_offtoblk type");
949 }
950 /* NOTREACHED */
951
952 return (0);
953 }