]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
91447636 | 2 | * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. |
1c79356b A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
ff6e181a A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
1c79356b | 12 | * |
ff6e181a A |
13 | * The Original Code and all software distributed under the License are |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
1c79356b A |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
ff6e181a A |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
1c79356b A |
20 | * |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
23 | /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ | |
24 | /* | |
25 | * Copyright (c) 1989, 1993, 1995 | |
26 | * The Regents of the University of California. All rights reserved. | |
27 | * | |
28 | * Redistribution and use in source and binary forms, with or without | |
29 | * modification, are permitted provided that the following conditions | |
30 | * are met: | |
31 | * 1. Redistributions of source code must retain the above copyright | |
32 | * notice, this list of conditions and the following disclaimer. | |
33 | * 2. Redistributions in binary form must reproduce the above copyright | |
34 | * notice, this list of conditions and the following disclaimer in the | |
35 | * documentation and/or other materials provided with the distribution. | |
36 | * 3. All advertising materials mentioning features or use of this software | |
37 | * must display the following acknowledgement: | |
38 | * This product includes software developed by the University of | |
39 | * California, Berkeley and its contributors. | |
40 | * 4. Neither the name of the University nor the names of its contributors | |
41 | * may be used to endorse or promote products derived from this software | |
42 | * without specific prior written permission. | |
43 | * | |
44 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
45 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
46 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
47 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
48 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
49 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
50 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
51 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
52 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
53 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
54 | * SUCH DAMAGE. | |
55 | * | |
56 | * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95 | |
57 | */ | |
58 | ||
59 | #include <sys/param.h> | |
91447636 A |
60 | #include <sys/proc_internal.h> |
61 | #include <sys/kauth.h> | |
1c79356b A |
62 | #include <sys/systm.h> |
63 | #include <sys/kernel.h> | |
64 | #include <sys/conf.h> | |
91447636 A |
65 | #include <sys/buf_internal.h> |
66 | #include <sys/mount_internal.h> | |
1c79356b | 67 | #include <sys/namei.h> |
91447636 | 68 | #include <sys/vnode_internal.h> |
1c79356b A |
69 | #include <sys/stat.h> |
70 | #include <sys/errno.h> | |
71 | #include <sys/ioctl.h> | |
72 | #include <sys/file.h> | |
91447636 | 73 | #include <sys/user.h> |
1c79356b | 74 | #include <sys/malloc.h> |
55e303ae | 75 | #include <sys/disk.h> |
91447636 | 76 | #include <sys/uio_internal.h> |
1c79356b A |
77 | #include <miscfs/specfs/specdev.h> |
78 | #include <vfs/vfs_support.h> | |
79 | ||
9bccf70c | 80 | #include <sys/kdebug.h> |
1c79356b A |
81 | |
82 | struct vnode *speclisth[SPECHSZ]; | |
83 | ||
84 | /* symbolic sleep message strings for devices */ | |
85 | char devopn[] = "devopn"; | |
86 | char devio[] = "devio"; | |
87 | char devwait[] = "devwait"; | |
88 | char devin[] = "devin"; | |
89 | char devout[] = "devout"; | |
90 | char devioc[] = "devioc"; | |
91 | char devcls[] = "devcls"; | |
92 | ||
93 | #define VOPFUNC int (*)(void *) | |
94 | ||
95 | int (**spec_vnodeop_p)(void *); | |
96 | struct vnodeopv_entry_desc spec_vnodeop_entries[] = { | |
91447636 A |
97 | { &vnop_default_desc, (VOPFUNC)vn_default_error }, |
98 | { &vnop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */ | |
99 | { &vnop_create_desc, (VOPFUNC)err_create }, /* create */ | |
100 | { &vnop_mknod_desc, (VOPFUNC)err_mknod }, /* mknod */ | |
101 | { &vnop_open_desc, (VOPFUNC)spec_open }, /* open */ | |
102 | { &vnop_close_desc, (VOPFUNC)spec_close }, /* close */ | |
103 | { &vnop_access_desc, (VOPFUNC)spec_access }, /* access */ | |
104 | { &vnop_getattr_desc, (VOPFUNC)spec_getattr }, /* getattr */ | |
105 | { &vnop_setattr_desc, (VOPFUNC)spec_setattr }, /* setattr */ | |
106 | { &vnop_read_desc, (VOPFUNC)spec_read }, /* read */ | |
107 | { &vnop_write_desc, (VOPFUNC)spec_write }, /* write */ | |
108 | { &vnop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */ | |
109 | { &vnop_select_desc, (VOPFUNC)spec_select }, /* select */ | |
110 | { &vnop_revoke_desc, (VOPFUNC)nop_revoke }, /* revoke */ | |
111 | { &vnop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */ | |
112 | { &vnop_fsync_desc, (VOPFUNC)spec_fsync }, /* fsync */ | |
113 | { &vnop_remove_desc, (VOPFUNC)err_remove }, /* remove */ | |
114 | { &vnop_link_desc, (VOPFUNC)err_link }, /* link */ | |
115 | { &vnop_rename_desc, (VOPFUNC)err_rename }, /* rename */ | |
116 | { &vnop_mkdir_desc, (VOPFUNC)err_mkdir }, /* mkdir */ | |
117 | { &vnop_rmdir_desc, (VOPFUNC)err_rmdir }, /* rmdir */ | |
118 | { &vnop_symlink_desc, (VOPFUNC)err_symlink }, /* symlink */ | |
119 | { &vnop_readdir_desc, (VOPFUNC)err_readdir }, /* readdir */ | |
120 | { &vnop_readlink_desc, (VOPFUNC)err_readlink }, /* readlink */ | |
121 | { &vnop_inactive_desc, (VOPFUNC)nop_inactive }, /* inactive */ | |
122 | { &vnop_reclaim_desc, (VOPFUNC)nop_reclaim }, /* reclaim */ | |
123 | { &vnop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */ | |
124 | { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ | |
125 | { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ | |
126 | { &vnop_bwrite_desc, (VOPFUNC)spec_bwrite }, /* bwrite */ | |
127 | { &vnop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ | |
128 | { &vnop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */ | |
129 | { &vnop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */ | |
130 | { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ | |
131 | { &vnop_blktooff_desc, (VOPFUNC)spec_blktooff }, /* blktooff */ | |
132 | { &vnop_offtoblk_desc, (VOPFUNC)spec_offtoblk }, /* offtoblk */ | |
133 | { &vnop_blockmap_desc, (VOPFUNC)spec_blockmap }, /* blockmap */ | |
1c79356b A |
134 | { (struct vnodeop_desc*)NULL, (int(*)())NULL } |
135 | }; | |
136 | struct vnodeopv_desc spec_vnodeop_opv_desc = | |
137 | { &spec_vnodeop_p, spec_vnodeop_entries }; | |
138 | ||
91447636 A |
139 | |
140 | static void set_blocksize(vnode_t, dev_t); | |
141 | ||
142 | ||
1c79356b A |
143 | /* |
144 | * Trivial lookup routine that always fails. | |
145 | */ | |
146 | int | |
147 | spec_lookup(ap) | |
91447636 | 148 | struct vnop_lookup_args /* { |
1c79356b A |
149 | struct vnode *a_dvp; |
150 | struct vnode **a_vpp; | |
151 | struct componentname *a_cnp; | |
91447636 | 152 | vfs_context_t a_context; |
1c79356b A |
153 | } */ *ap; |
154 | { | |
155 | ||
156 | *ap->a_vpp = NULL; | |
157 | return (ENOTDIR); | |
158 | } | |
159 | ||
91447636 | 160 | static void |
1c79356b A |
161 | set_blocksize(struct vnode *vp, dev_t dev) |
162 | { | |
91447636 | 163 | int (*size)(dev_t); |
1c79356b A |
164 | int rsize; |
165 | ||
166 | if ((major(dev) < nblkdev) && (size = bdevsw[major(dev)].d_psize)) { | |
167 | rsize = (*size)(dev); | |
168 | if (rsize <= 0) /* did size fail? */ | |
169 | vp->v_specsize = DEV_BSIZE; | |
170 | else | |
171 | vp->v_specsize = rsize; | |
172 | } | |
173 | else | |
174 | vp->v_specsize = DEV_BSIZE; | |
175 | } | |
176 | ||
177 | void | |
178 | set_fsblocksize(struct vnode *vp) | |
179 | { | |
180 | ||
181 | if (vp->v_type == VBLK) { | |
182 | dev_t dev = (dev_t)vp->v_rdev; | |
183 | int maj = major(dev); | |
184 | ||
91447636 | 185 | if ((u_int)maj >= (u_int)nblkdev) |
1c79356b A |
186 | return; |
187 | ||
91447636 | 188 | vnode_lock(vp); |
1c79356b | 189 | set_blocksize(vp, dev); |
91447636 | 190 | vnode_unlock(vp); |
1c79356b A |
191 | } |
192 | ||
193 | } | |
194 | ||
195 | ||
196 | /* | |
197 | * Open a special file. | |
198 | */ | |
91447636 | 199 | int |
1c79356b | 200 | spec_open(ap) |
91447636 | 201 | struct vnop_open_args /* { |
1c79356b A |
202 | struct vnode *a_vp; |
203 | int a_mode; | |
91447636 | 204 | vfs_context_t a_context; |
1c79356b A |
205 | } */ *ap; |
206 | { | |
91447636 A |
207 | struct proc *p = vfs_context_proc(ap->a_context); |
208 | kauth_cred_t cred = vfs_context_ucred(ap->a_context); | |
209 | struct vnode *vp = ap->a_vp; | |
1c79356b A |
210 | dev_t bdev, dev = (dev_t)vp->v_rdev; |
211 | int maj = major(dev); | |
212 | int error; | |
213 | ||
214 | /* | |
215 | * Don't allow open if fs is mounted -nodev. | |
216 | */ | |
217 | if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) | |
218 | return (ENXIO); | |
219 | ||
220 | switch (vp->v_type) { | |
221 | ||
222 | case VCHR: | |
91447636 | 223 | if ((u_int)maj >= (u_int)nchrdev) |
1c79356b | 224 | return (ENXIO); |
91447636 | 225 | if (cred != FSCRED && (ap->a_mode & FWRITE)) { |
1c79356b A |
226 | /* |
227 | * When running in very secure mode, do not allow | |
228 | * opens for writing of any disk character devices. | |
229 | */ | |
230 | if (securelevel >= 2 && isdisk(dev, VCHR)) | |
231 | return (EPERM); | |
232 | /* | |
233 | * When running in secure mode, do not allow opens | |
234 | * for writing of /dev/mem, /dev/kmem, or character | |
235 | * devices whose corresponding block devices are | |
236 | * currently mounted. | |
237 | */ | |
238 | if (securelevel >= 1) { | |
91447636 | 239 | if ((bdev = chrtoblk(dev)) != NODEV && check_mountedon(bdev, VBLK, &error)) |
1c79356b A |
240 | return (error); |
241 | if (iskmemdev(dev)) | |
242 | return (EPERM); | |
243 | } | |
244 | } | |
91447636 A |
245 | if (cdevsw[maj].d_type == D_TTY) { |
246 | vnode_lock(vp); | |
1c79356b | 247 | vp->v_flag |= VISTTY; |
91447636 A |
248 | vnode_unlock(vp); |
249 | } | |
1c79356b | 250 | error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p); |
1c79356b A |
251 | return (error); |
252 | ||
253 | case VBLK: | |
91447636 | 254 | if ((u_int)maj >= (u_int)nblkdev) |
1c79356b A |
255 | return (ENXIO); |
256 | /* | |
257 | * When running in very secure mode, do not allow | |
258 | * opens for writing of any disk block devices. | |
259 | */ | |
91447636 | 260 | if (securelevel >= 2 && cred != FSCRED && |
1c79356b A |
261 | (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK) |
262 | return (EPERM); | |
263 | /* | |
264 | * Do not allow opens of block devices that are | |
265 | * currently mounted. | |
266 | */ | |
91447636 | 267 | if ( (error = vfs_mountedon(vp)) ) |
1c79356b A |
268 | return (error); |
269 | error = (*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p); | |
270 | if (!error) { | |
55e303ae A |
271 | u_int64_t blkcnt; |
272 | u_int32_t blksize; | |
91447636 A |
273 | int setsize = 0; |
274 | u_int32_t size512 = 512; | |
275 | ||
276 | ||
277 | if (!VNOP_IOCTL(vp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, ap->a_context)) { | |
278 | /* Switch to 512 byte sectors (temporarily) */ | |
55e303ae | 279 | |
91447636 A |
280 | if (!VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, ap->a_context)) { |
281 | /* Get the number of 512 byte physical blocks. */ | |
282 | if (!VNOP_IOCTL(vp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, ap->a_context)) { | |
283 | setsize = 1; | |
284 | } | |
285 | } | |
286 | /* If it doesn't set back, we can't recover */ | |
287 | if (VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, ap->a_context)) | |
288 | error = ENXIO; | |
289 | } | |
290 | ||
291 | ||
292 | vnode_lock(vp); | |
1c79356b | 293 | set_blocksize(vp, dev); |
55e303ae A |
294 | |
295 | /* | |
296 | * Cache the size in bytes of the block device for later | |
297 | * use by spec_write(). | |
298 | */ | |
91447636 | 299 | if (setsize) |
55e303ae | 300 | vp->v_specdevsize = blkcnt * (u_int64_t)size512; |
91447636 A |
301 | else |
302 | vp->v_specdevsize = (u_int64_t)0; /* Default: Can't get */ | |
303 | ||
304 | vnode_unlock(vp); | |
305 | ||
1c79356b A |
306 | } |
307 | return(error); | |
91447636 A |
308 | default: |
309 | panic("spec_open type"); | |
1c79356b A |
310 | } |
311 | return (0); | |
312 | } | |
313 | ||
314 | /* | |
315 | * Vnode op for read | |
316 | */ | |
91447636 | 317 | int |
1c79356b | 318 | spec_read(ap) |
91447636 | 319 | struct vnop_read_args /* { |
1c79356b A |
320 | struct vnode *a_vp; |
321 | struct uio *a_uio; | |
322 | int a_ioflag; | |
91447636 | 323 | vfs_context_t a_context; |
1c79356b A |
324 | } */ *ap; |
325 | { | |
326 | register struct vnode *vp = ap->a_vp; | |
327 | register struct uio *uio = ap->a_uio; | |
1c79356b | 328 | struct buf *bp; |
91447636 | 329 | daddr64_t bn, nextbn; |
1c79356b A |
330 | long bsize, bscale; |
331 | int devBlockSize=0; | |
91447636 | 332 | int n, on; |
1c79356b A |
333 | int error = 0; |
334 | dev_t dev; | |
335 | ||
336 | #if DIAGNOSTIC | |
337 | if (uio->uio_rw != UIO_READ) | |
338 | panic("spec_read mode"); | |
91447636 | 339 | if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) |
1c79356b A |
340 | panic("spec_read proc"); |
341 | #endif | |
91447636 | 342 | if (uio_resid(uio) == 0) |
1c79356b A |
343 | return (0); |
344 | ||
345 | switch (vp->v_type) { | |
346 | ||
347 | case VCHR: | |
1c79356b A |
348 | error = (*cdevsw[major(vp->v_rdev)].d_read) |
349 | (vp->v_rdev, uio, ap->a_ioflag); | |
1c79356b A |
350 | return (error); |
351 | ||
352 | case VBLK: | |
353 | if (uio->uio_offset < 0) | |
354 | return (EINVAL); | |
355 | ||
356 | dev = vp->v_rdev; | |
357 | ||
358 | devBlockSize = vp->v_specsize; | |
359 | ||
360 | if (devBlockSize > PAGE_SIZE) | |
361 | return (EINVAL); | |
362 | ||
363 | bscale = PAGE_SIZE / devBlockSize; | |
364 | bsize = bscale * devBlockSize; | |
365 | ||
366 | do { | |
367 | on = uio->uio_offset % bsize; | |
368 | ||
91447636 | 369 | bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ (bscale - 1)); |
1c79356b | 370 | |
91447636 | 371 | if (vp->v_speclastr + bscale == bn) { |
1c79356b | 372 | nextbn = bn + bscale; |
91447636 | 373 | error = buf_breadn(vp, bn, (int)bsize, &nextbn, |
1c79356b A |
374 | (int *)&bsize, 1, NOCRED, &bp); |
375 | } else | |
91447636 A |
376 | error = buf_bread(vp, bn, (int)bsize, NOCRED, &bp); |
377 | ||
378 | vnode_lock(vp); | |
379 | vp->v_speclastr = bn; | |
380 | vnode_unlock(vp); | |
1c79356b | 381 | |
91447636 | 382 | n = bsize - buf_resid(bp); |
1c79356b A |
383 | if ((on > n) || error) { |
384 | if (!error) | |
385 | error = EINVAL; | |
91447636 | 386 | buf_brelse(bp); |
1c79356b A |
387 | return (error); |
388 | } | |
91447636 A |
389 | // LP64todo - fix this! |
390 | n = min((unsigned)(n - on), uio_resid(uio)); | |
1c79356b | 391 | |
91447636 | 392 | error = uiomove((char *)buf_dataptr(bp) + on, n, uio); |
1c79356b | 393 | if (n + on == bsize) |
91447636 A |
394 | buf_markaged(bp); |
395 | buf_brelse(bp); | |
396 | } while (error == 0 && uio_resid(uio) > 0 && n != 0); | |
1c79356b A |
397 | return (error); |
398 | ||
399 | default: | |
400 | panic("spec_read type"); | |
401 | } | |
402 | /* NOTREACHED */ | |
91447636 A |
403 | |
404 | return (0); | |
1c79356b A |
405 | } |
406 | ||
407 | /* | |
408 | * Vnode op for write | |
409 | */ | |
91447636 | 410 | int |
1c79356b | 411 | spec_write(ap) |
91447636 | 412 | struct vnop_write_args /* { |
1c79356b A |
413 | struct vnode *a_vp; |
414 | struct uio *a_uio; | |
415 | int a_ioflag; | |
91447636 | 416 | vfs_context_t a_context; |
1c79356b A |
417 | } */ *ap; |
418 | { | |
419 | register struct vnode *vp = ap->a_vp; | |
420 | register struct uio *uio = ap->a_uio; | |
1c79356b | 421 | struct buf *bp; |
91447636 | 422 | daddr64_t bn; |
1c79356b A |
423 | int bsize, blkmask, bscale; |
424 | register int io_sync; | |
425 | register int io_size; | |
426 | int devBlockSize=0; | |
427 | register int n, on; | |
428 | int error = 0; | |
429 | dev_t dev; | |
430 | ||
431 | #if DIAGNOSTIC | |
432 | if (uio->uio_rw != UIO_WRITE) | |
433 | panic("spec_write mode"); | |
91447636 | 434 | if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) |
1c79356b A |
435 | panic("spec_write proc"); |
436 | #endif | |
437 | ||
438 | switch (vp->v_type) { | |
439 | ||
440 | case VCHR: | |
1c79356b A |
441 | error = (*cdevsw[major(vp->v_rdev)].d_write) |
442 | (vp->v_rdev, uio, ap->a_ioflag); | |
1c79356b A |
443 | return (error); |
444 | ||
445 | case VBLK: | |
91447636 | 446 | if (uio_resid(uio) == 0) |
1c79356b A |
447 | return (0); |
448 | if (uio->uio_offset < 0) | |
449 | return (EINVAL); | |
450 | ||
451 | io_sync = (ap->a_ioflag & IO_SYNC); | |
91447636 A |
452 | // LP64todo - fix this! |
453 | io_size = uio_resid(uio); | |
1c79356b A |
454 | |
455 | dev = (vp->v_rdev); | |
456 | ||
457 | devBlockSize = vp->v_specsize; | |
458 | if (devBlockSize > PAGE_SIZE) | |
459 | return(EINVAL); | |
460 | ||
461 | bscale = PAGE_SIZE / devBlockSize; | |
462 | blkmask = bscale - 1; | |
463 | bsize = bscale * devBlockSize; | |
464 | ||
465 | ||
466 | do { | |
91447636 | 467 | bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ blkmask); |
1c79356b A |
468 | on = uio->uio_offset % bsize; |
469 | ||
91447636 A |
470 | // LP64todo - fix this! |
471 | n = min((unsigned)(bsize - on), uio_resid(uio)); | |
1c79356b | 472 | |
55e303ae | 473 | /* |
91447636 | 474 | * Use buf_getblk() as an optimization IFF: |
55e303ae A |
475 | * |
476 | * 1) We are reading exactly a block on a block | |
477 | * aligned boundary | |
478 | * 2) We know the size of the device from spec_open | |
479 | * 3) The read doesn't span the end of the device | |
480 | * | |
91447636 | 481 | * Otherwise, we fall back on buf_bread(). |
55e303ae A |
482 | */ |
483 | if (n == bsize && | |
484 | vp->v_specdevsize != (u_int64_t)0 && | |
485 | (uio->uio_offset + (u_int64_t)n) > vp->v_specdevsize) { | |
486 | /* reduce the size of the read to what is there */ | |
487 | n = (uio->uio_offset + (u_int64_t)n) - vp->v_specdevsize; | |
488 | } | |
489 | ||
1c79356b | 490 | if (n == bsize) |
91447636 | 491 | bp = buf_getblk(vp, bn, bsize, 0, 0, BLK_WRITE); |
1c79356b | 492 | else |
91447636 | 493 | error = (int)buf_bread(vp, bn, bsize, NOCRED, &bp); |
1c79356b | 494 | |
55e303ae | 495 | /* Translate downstream error for upstream, if needed */ |
91447636 A |
496 | if (!error) |
497 | error = (int)buf_error(bp); | |
1c79356b | 498 | if (error) { |
91447636 | 499 | buf_brelse(bp); |
1c79356b A |
500 | return (error); |
501 | } | |
91447636 | 502 | n = min(n, bsize - buf_resid(bp)); |
1c79356b | 503 | |
91447636 A |
504 | error = uiomove((char *)buf_dataptr(bp) + on, n, uio); |
505 | if (error) { | |
506 | buf_brelse(bp); | |
507 | return (error); | |
508 | } | |
509 | buf_markaged(bp); | |
1c79356b A |
510 | |
511 | if (io_sync) | |
91447636 | 512 | error = buf_bwrite(bp); |
1c79356b A |
513 | else { |
514 | if ((n + on) == bsize) | |
91447636 | 515 | error = buf_bawrite(bp); |
1c79356b | 516 | else |
91447636 | 517 | error = buf_bdwrite(bp); |
1c79356b | 518 | } |
91447636 | 519 | } while (error == 0 && uio_resid(uio) > 0 && n != 0); |
1c79356b A |
520 | return (error); |
521 | ||
522 | default: | |
523 | panic("spec_write type"); | |
524 | } | |
525 | /* NOTREACHED */ | |
91447636 A |
526 | |
527 | return (0); | |
1c79356b A |
528 | } |
529 | ||
530 | /* | |
531 | * Device ioctl operation. | |
532 | */ | |
91447636 | 533 | int |
1c79356b | 534 | spec_ioctl(ap) |
91447636 | 535 | struct vnop_ioctl_args /* { |
1c79356b A |
536 | struct vnode *a_vp; |
537 | int a_command; | |
538 | caddr_t a_data; | |
539 | int a_fflag; | |
91447636 | 540 | vfs_context_t a_context; |
1c79356b A |
541 | } */ *ap; |
542 | { | |
91447636 | 543 | proc_t p = vfs_context_proc(ap->a_context); |
1c79356b A |
544 | dev_t dev = ap->a_vp->v_rdev; |
545 | ||
546 | switch (ap->a_vp->v_type) { | |
547 | ||
548 | case VCHR: | |
549 | return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, | |
91447636 | 550 | ap->a_fflag, p)); |
1c79356b A |
551 | |
552 | case VBLK: | |
91447636 | 553 | if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) { |
1c79356b A |
554 | if (bdevsw[major(dev)].d_type == D_TAPE) |
555 | return (0); | |
556 | else | |
557 | return (1); | |
91447636 | 558 | } |
1c79356b | 559 | return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, |
91447636 | 560 | ap->a_fflag, p)); |
1c79356b A |
561 | |
562 | default: | |
563 | panic("spec_ioctl"); | |
564 | /* NOTREACHED */ | |
565 | } | |
91447636 | 566 | return (0); |
1c79356b A |
567 | } |
568 | ||
91447636 | 569 | int |
1c79356b | 570 | spec_select(ap) |
91447636 | 571 | struct vnop_select_args /* { |
1c79356b A |
572 | struct vnode *a_vp; |
573 | int a_which; | |
574 | int a_fflags; | |
0b4e3aa0 | 575 | void * a_wql; |
91447636 | 576 | vfs_context_t a_context; |
1c79356b A |
577 | } */ *ap; |
578 | { | |
91447636 | 579 | proc_t p = vfs_context_proc(ap->a_context); |
1c79356b A |
580 | register dev_t dev; |
581 | ||
582 | switch (ap->a_vp->v_type) { | |
583 | ||
584 | default: | |
585 | return (1); /* XXX */ | |
586 | ||
587 | case VCHR: | |
588 | dev = ap->a_vp->v_rdev; | |
91447636 | 589 | return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_wql, p); |
1c79356b A |
590 | } |
591 | } | |
91447636 | 592 | |
1c79356b A |
593 | /* |
594 | * Synch buffers associated with a block device | |
595 | */ | |
1c79356b | 596 | int |
91447636 | 597 | spec_fsync_internal(vnode_t vp, int waitfor, __unused vfs_context_t context) |
1c79356b | 598 | { |
1c79356b A |
599 | if (vp->v_type == VCHR) |
600 | return (0); | |
601 | /* | |
602 | * Flush all dirty buffers associated with a block device. | |
603 | */ | |
91447636 A |
604 | buf_flushdirtyblks(vp, waitfor == MNT_WAIT, 0, (char *)"spec_fsync"); |
605 | ||
1c79356b A |
606 | return (0); |
607 | } | |
608 | ||
91447636 A |
609 | int |
610 | spec_fsync(ap) | |
611 | struct vnop_fsync_args /* { | |
612 | struct vnode *a_vp; | |
613 | int a_waitfor; | |
614 | vfs_context_t a_context; | |
615 | } */ *ap; | |
616 | { | |
617 | return spec_fsync_internal(ap->a_vp, ap->a_waitfor, ap->a_context); | |
618 | } | |
619 | ||
1c79356b A |
620 | /* |
621 | * Just call the device strategy routine | |
622 | */ | |
91447636 A |
623 | extern int hard_throttle_on_root; |
624 | ||
625 | ||
626 | #define LOWPRI_DELAY_MSECS 200 | |
627 | #define LOWPRI_WINDOW_MSECS 200 | |
628 | ||
629 | int lowpri_IO_window_msecs = LOWPRI_WINDOW_MSECS; | |
630 | int lowpri_IO_delay_msecs = LOWPRI_DELAY_MSECS; | |
631 | ||
632 | struct timeval last_normal_IO_timestamp; | |
633 | struct timeval last_lowpri_IO_timestamp; | |
634 | struct timeval lowpri_IO_window = { 0, LOWPRI_WINDOW_MSECS * 1000 }; | |
635 | ||
636 | int | |
1c79356b | 637 | spec_strategy(ap) |
91447636 | 638 | struct vnop_strategy_args /* { |
1c79356b A |
639 | struct buf *a_bp; |
640 | } */ *ap; | |
641 | { | |
91447636 A |
642 | buf_t bp; |
643 | int bflags; | |
644 | dev_t bdev; | |
645 | proc_t p; | |
646 | struct timeval elapsed; | |
9bccf70c A |
647 | |
648 | bp = ap->a_bp; | |
91447636 A |
649 | bdev = buf_device(bp); |
650 | bflags = buf_flags(bp); | |
9bccf70c A |
651 | |
652 | if (kdebug_enable) { | |
91447636 | 653 | int code = 0; |
9bccf70c | 654 | |
91447636 A |
655 | if (bflags & B_READ) |
656 | code |= DKIO_READ; | |
657 | if (bflags & B_ASYNC) | |
658 | code |= DKIO_ASYNC; | |
9bccf70c | 659 | |
91447636 A |
660 | if (bflags & B_META) |
661 | code |= DKIO_META; | |
662 | else if (bflags & B_PAGEIO) | |
663 | code |= DKIO_PAGING; | |
9bccf70c | 664 | |
91447636 A |
665 | KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE, |
666 | (unsigned int)bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0); | |
9bccf70c | 667 | } |
91447636 A |
668 | if (((bflags & (B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) && |
669 | (buf_vnode(bp)->v_mount->mnt_kern_flag & MNTK_ROOTDEV)) | |
670 | hard_throttle_on_root = 1; | |
671 | ||
672 | if ( lowpri_IO_delay_msecs && lowpri_IO_window_msecs ) { | |
673 | p = current_proc(); | |
674 | ||
675 | if ( (p == NULL) || !(p->p_lflag & P_LLOW_PRI_IO)) { | |
676 | if (!(p->p_lflag & P_LBACKGROUND_IO)) | |
677 | microuptime(&last_normal_IO_timestamp); | |
678 | } else { | |
679 | microuptime(&last_lowpri_IO_timestamp); | |
680 | ||
681 | elapsed = last_lowpri_IO_timestamp; | |
682 | timevalsub(&elapsed, &last_normal_IO_timestamp); | |
683 | ||
684 | lowpri_IO_window.tv_sec = lowpri_IO_window_msecs / 1000; | |
685 | lowpri_IO_window.tv_usec = (lowpri_IO_window_msecs % 1000) * 1000; | |
686 | ||
687 | if (timevalcmp(&elapsed, &lowpri_IO_window, <)) { | |
688 | struct uthread *ut; | |
689 | ||
690 | /* | |
691 | * I'd really like to do the IOSleep here, but | |
692 | * we may be holding all kinds of filesystem related locks | |
693 | * and the pages for this I/O marked 'busy'... | |
694 | * we don't want to cause a normal task to block on | |
695 | * one of these locks while we're throttling a task marked | |
696 | * for low priority I/O... we'll mark the uthread and | |
697 | * do the delay just before we return from the system | |
698 | * call that triggered this I/O or from vnode_pagein | |
699 | */ | |
700 | ut = get_bsdthread_info(current_thread()); | |
701 | ut->uu_lowpri_delay = lowpri_IO_delay_msecs; | |
702 | } | |
ccc36f2f A |
703 | } |
704 | } | |
91447636 | 705 | (*bdevsw[major(bdev)].d_strategy)(bp); |
ccc36f2f | 706 | |
91447636 | 707 | return (0); |
ccc36f2f A |
708 | } |
709 | ||
1c79356b A |
710 | |
711 | /* | |
712 | * This is a noop, simply returning what one has been given. | |
713 | */ | |
91447636 A |
714 | int |
715 | spec_blockmap(__unused struct vnop_blockmap_args *ap) | |
1c79356b | 716 | { |
91447636 | 717 | return (ENOTSUP); |
1c79356b A |
718 | } |
719 | ||
720 | ||
721 | /* | |
722 | * Device close routine | |
723 | */ | |
91447636 | 724 | int |
1c79356b | 725 | spec_close(ap) |
91447636 | 726 | struct vnop_close_args /* { |
1c79356b A |
727 | struct vnode *a_vp; |
728 | int a_fflag; | |
91447636 | 729 | vfs_context_t a_context; |
1c79356b A |
730 | } */ *ap; |
731 | { | |
732 | register struct vnode *vp = ap->a_vp; | |
733 | dev_t dev = vp->v_rdev; | |
91447636 | 734 | int (*devclose)(dev_t, int, int, struct proc *); |
1c79356b | 735 | int mode, error; |
91447636 | 736 | struct proc *p = vfs_context_proc(ap->a_context); |
1c79356b A |
737 | |
738 | switch (vp->v_type) { | |
739 | ||
740 | case VCHR: | |
741 | /* | |
742 | * Hack: a tty device that is a controlling terminal | |
743 | * has a reference from the session structure. | |
744 | * We cannot easily tell that a character device is | |
745 | * a controlling terminal, unless it is the closing | |
746 | * process' controlling terminal. In that case, | |
747 | * if the reference count is 2 (this last descriptor | |
748 | * plus the session), release the reference from the session. | |
749 | */ | |
91447636 A |
750 | if (vcount(vp) == 2 && p && |
751 | vp == p->p_session->s_ttyvp) { | |
752 | p->p_session->s_ttyvp = NULL; | |
753 | vnode_rele(vp); | |
1c79356b A |
754 | } |
755 | /* | |
1c79356b A |
756 | * close on last reference. |
757 | */ | |
91447636 | 758 | if (vcount(vp) > 1) |
1c79356b A |
759 | return (0); |
760 | devclose = cdevsw[major(dev)].d_close; | |
761 | mode = S_IFCHR; | |
762 | break; | |
763 | ||
764 | case VBLK: | |
0b4e3aa0 | 765 | #ifdef DEVFS_IMPLEMENTS_LOCKING |
1c79356b A |
766 | /* |
767 | * On last close of a block device (that isn't mounted) | |
768 | * we must invalidate any in core blocks, so that | |
769 | * we can, for instance, change floppy disks. | |
770 | */ | |
91447636 A |
771 | if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context))) |
772 | return (error); | |
773 | ||
774 | error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0); | |
0b4e3aa0 | 775 | if (error) |
1c79356b A |
776 | return (error); |
777 | /* | |
91447636 | 778 | * Since every use (buffer, vnode, swap, blockmap) |
1c79356b A |
779 | * holds a reference to the vnode, and because we mark |
780 | * any other vnodes that alias this device, when the | |
781 | * sum of the reference counts on all the aliased | |
782 | * vnodes descends to one, we are on last close. | |
783 | */ | |
91447636 | 784 | if (vcount(vp) > 1) |
1c79356b | 785 | return (0); |
0b4e3aa0 A |
786 | #else /* DEVFS_IMPLEMENTS_LOCKING */ |
787 | /* | |
91447636 | 788 | * Since every use (buffer, vnode, swap, blockmap) |
0b4e3aa0 A |
789 | * holds a reference to the vnode, and because we mark |
790 | * any other vnodes that alias this device, when the | |
791 | * sum of the reference counts on all the aliased | |
792 | * vnodes descends to one, we are on last close. | |
793 | */ | |
91447636 | 794 | if (vcount(vp) > 1) |
0b4e3aa0 A |
795 | return (0); |
796 | ||
797 | /* | |
798 | * On last close of a block device (that isn't mounted) | |
799 | * we must invalidate any in core blocks, so that | |
800 | * we can, for instance, change floppy disks. | |
801 | */ | |
91447636 A |
802 | if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context))) |
803 | return (error); | |
804 | ||
805 | error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0); | |
0b4e3aa0 A |
806 | if (error) |
807 | return (error); | |
808 | #endif /* DEVFS_IMPLEMENTS_LOCKING */ | |
1c79356b A |
809 | devclose = bdevsw[major(dev)].d_close; |
810 | mode = S_IFBLK; | |
811 | break; | |
812 | ||
813 | default: | |
814 | panic("spec_close: not special"); | |
815 | } | |
816 | ||
91447636 | 817 | return ((*devclose)(dev, ap->a_fflag, mode, p)); |
1c79356b A |
818 | } |
819 | ||
820 | /* | |
821 | * Return POSIX pathconf information applicable to special devices. | |
822 | */ | |
91447636 | 823 | int |
1c79356b | 824 | spec_pathconf(ap) |
91447636 | 825 | struct vnop_pathconf_args /* { |
1c79356b A |
826 | struct vnode *a_vp; |
827 | int a_name; | |
828 | int *a_retval; | |
91447636 | 829 | vfs_context_t a_context; |
1c79356b A |
830 | } */ *ap; |
831 | { | |
832 | ||
833 | switch (ap->a_name) { | |
834 | case _PC_LINK_MAX: | |
835 | *ap->a_retval = LINK_MAX; | |
836 | return (0); | |
837 | case _PC_MAX_CANON: | |
838 | *ap->a_retval = MAX_CANON; | |
839 | return (0); | |
840 | case _PC_MAX_INPUT: | |
841 | *ap->a_retval = MAX_INPUT; | |
842 | return (0); | |
843 | case _PC_PIPE_BUF: | |
844 | *ap->a_retval = PIPE_BUF; | |
845 | return (0); | |
846 | case _PC_CHOWN_RESTRICTED: | |
847 | *ap->a_retval = 1; | |
848 | return (0); | |
849 | case _PC_VDISABLE: | |
850 | *ap->a_retval = _POSIX_VDISABLE; | |
851 | return (0); | |
852 | default: | |
853 | return (EINVAL); | |
854 | } | |
855 | /* NOTREACHED */ | |
856 | } | |
857 | ||
858 | int | |
859 | spec_devblocksize(ap) | |
91447636 | 860 | struct vnop_devblocksize_args /* { |
1c79356b A |
861 | struct vnode *a_vp; |
862 | int *a_retval; | |
863 | } */ *ap; | |
864 | { | |
865 | *ap->a_retval = (ap->a_vp->v_specsize); | |
866 | return (0); | |
867 | } | |
868 | ||
869 | /* | |
870 | * Special device failed operation | |
871 | */ | |
91447636 A |
872 | int |
873 | spec_ebadf(__unused void *dummy) | |
1c79356b A |
874 | { |
875 | ||
876 | return (EBADF); | |
877 | } | |
878 | ||
879 | /* | |
880 | * Special device bad operation | |
881 | */ | |
91447636 | 882 | int |
1c79356b A |
883 | spec_badop() |
884 | { | |
885 | ||
886 | panic("spec_badop called"); | |
887 | /* NOTREACHED */ | |
888 | } | |
889 | ||
890 | /* Blktooff derives file offset from logical block number */ | |
891 | int | |
892 | spec_blktooff(ap) | |
91447636 | 893 | struct vnop_blktooff_args /* { |
1c79356b | 894 | struct vnode *a_vp; |
91447636 | 895 | daddr64_t a_lblkno; |
1c79356b A |
896 | off_t *a_offset; |
897 | } */ *ap; | |
898 | { | |
899 | register struct vnode *vp = ap->a_vp; | |
900 | ||
901 | switch (vp->v_type) { | |
902 | case VCHR: | |
903 | *ap->a_offset = (off_t)-1; /* failure */ | |
91447636 | 904 | return (ENOTSUP); |
1c79356b A |
905 | |
906 | case VBLK: | |
907 | printf("spec_blktooff: not implemented for VBLK\n"); | |
908 | *ap->a_offset = (off_t)-1; /* failure */ | |
91447636 | 909 | return (ENOTSUP); |
1c79356b A |
910 | |
911 | default: | |
912 | panic("spec_blktooff type"); | |
913 | } | |
914 | /* NOTREACHED */ | |
91447636 A |
915 | |
916 | return (0); | |
1c79356b A |
917 | } |
918 | ||
919 | /* Offtoblk derives logical block number from file offset */ | |
920 | int | |
921 | spec_offtoblk(ap) | |
91447636 | 922 | struct vnop_offtoblk_args /* { |
1c79356b A |
923 | struct vnode *a_vp; |
924 | off_t a_offset; | |
91447636 | 925 | daddr64_t *a_lblkno; |
1c79356b A |
926 | } */ *ap; |
927 | { | |
928 | register struct vnode *vp = ap->a_vp; | |
929 | ||
930 | switch (vp->v_type) { | |
931 | case VCHR: | |
91447636 A |
932 | *ap->a_lblkno = (daddr64_t)-1; /* failure */ |
933 | return (ENOTSUP); | |
1c79356b A |
934 | |
935 | case VBLK: | |
936 | printf("spec_offtoblk: not implemented for VBLK\n"); | |
91447636 A |
937 | *ap->a_lblkno = (daddr64_t)-1; /* failure */ |
938 | return (ENOTSUP); | |
1c79356b A |
939 | |
940 | default: | |
941 | panic("spec_offtoblk type"); | |
942 | } | |
943 | /* NOTREACHED */ | |
91447636 A |
944 | |
945 | return (0); | |
1c79356b | 946 | } |